diff --git a/docs_src/manuals/user_manual/halo_exchanges.hrst b/docs_src/manuals/user_manual/halo_exchanges.hrst index ed3a32d8c7..4bf531e4af 100644 --- a/docs_src/manuals/user_manual/halo_exchanges.hrst +++ b/docs_src/manuals/user_manual/halo_exchanges.hrst @@ -91,7 +91,7 @@ distribution like in :numref:`fig_dist1`. :scale: 40% Example data distribution among two processes. - + In this case the map between data and the processor grid is: .. code-block:: gridtools @@ -162,20 +162,21 @@ the name of arguments should be self-explanatory: GCL Communication Module ------------------------- -Now we are ready to describe the :term:`Halo Exchange` patterns objects. The first one is ``halo_exchange_dynamic_ut``. The ``ut`` suffix stands for ``uniform types``, meaning that the data fields that this object will manage must all store the same value types, that are declared at instantiation time. The type of the object is defined as in this example: +Now we are ready to describe the :term:`Halo Exchange` patterns objects. The first one is ``halo_exchange_dynamic_ut``. The ``ut`` suffix stands for ``uniform types``, meaning that the data fields that this object will manage must all store the same value types, that are declared at instantiation time. The domain decomposition goes up to three dimensions and the data to be exchanged contained in 3 dimensional arrays (lower dimensions can be handled by setting the missing dimensions to 1). Being designed for three dimensional data, the layout maps have three elements (refer to :numref:storage-info for more information). + +The type of the object is defined as in this example: .. code-block:: gridtools using pattern_type = halo_exchange_dynamic_ut, - ayout_map<0, 1, 2>, value_type, 3, gcl_cpu>; + layout_map<0, 1, 2>, value_type, gcl_cpu>; The template arguments are: 1. the layout if the data; 2. the mapping between the data dimensions and processing :term:`Grid`, as described above (leave it as ``layout_map<0, 1, 2>`` if in doubt); 3. the type of the values to be exchanged; -4. the number of dimensions of the data/processing grid, and it has to be set to 3 (the GCL was designed to work with other dimensionalities, but the version in |GT| is a stripped down version waiting for the next more general interfaces); -5. the place where the data lives and for which the code is optimized. The options for this arguments are ``gcl_gpu``, ``gcl_cpu`` and ``gcl_mc`` (not supported yet). +4. the place where the data lives and for which the code is optimized. The options for this arguments are ``gcl_gpu``, ``gcl_cpu`` and ``gcl_mc`` (not supported yet). The :term:`Halo Exchange` object can be instantiated as: @@ -235,11 +236,12 @@ An alternative pattern supporting different element types is: .. code-block:: gridtools - using pattern_type = halo_exchange_generic, 3, arch_type>; + using pattern_type = halo_exchange_generic, arch_type>; -Now the :term:`Layout Map` in the type is the mapping of dimensions to the -computing grid, 3 is the number of dimensions, and arch_type is either -``gcl_gpu``, ``gcl_cpu`` or ``gcl_mc`` (not supported yet). +Now the :term:`Layout Map` in the type is the mapping of dimensions to +the computing grid (the number of dimensions is 3, so the layout map +has three elements), and arch_type is either ``gcl_gpu``, ``gcl_cpu`` +or ``gcl_mc`` (not supported yet). The construction of the object is identical to the previous one, but the set-up somewhat more complex now, since we have to indicate the diff --git a/include/gridtools/communication/GCL.hpp b/include/gridtools/communication/GCL.hpp index 3437adc2cd..8ccb4c5012 100644 --- a/include/gridtools/communication/GCL.hpp +++ b/include/gridtools/communication/GCL.hpp @@ -25,13 +25,6 @@ #include "low_level/gcl_arch.hpp" -#ifdef GCL_GPU - -// workaround that uses host buffering to avoid bad sends for messages larger than 512 kB on Cray systems -//#define GCL_HOSTWORKAROUND - -#endif - #ifdef GCL_GPU #ifdef GCL_MULTI_STREAMS #ifdef GCL_USE_3 @@ -61,8 +54,6 @@ extern cudaStream_t XU_stream; namespace gridtools { - enum packing_version { version_mpi_pack = 0, version_datatype, version_manual }; - #ifdef GCL_MPI extern MPI_Comm GCL_WORLD; #else diff --git a/include/gridtools/communication/halo_exchange.hpp b/include/gridtools/communication/halo_exchange.hpp index dc059421f4..abe31ee55b 100644 --- a/include/gridtools/communication/halo_exchange.hpp +++ b/include/gridtools/communication/halo_exchange.hpp @@ -11,13 +11,10 @@ #include "../common/boollist.hpp" #include "low_level/Halo_Exchange_3D.hpp" -#include "low_level/Halo_Exchange_3D_DT.hpp" #include "low_level/proc_grids_3D.hpp" #include "high_level/descriptor_generic_manual.hpp" #include "high_level/descriptors.hpp" -#include "high_level/descriptors_dt.hpp" -#include "high_level/descriptors_dt_whole.hpp" #include "high_level/descriptors_fwd.hpp" #include "high_level/descriptors_manual_gpu.hpp" @@ -46,36 +43,6 @@ namespace gridtools { return CartComm; } - template - struct get_pattern; - - template - struct get_pattern<3, GT, 0> { - typedef Halo_Exchange_3D type; - }; - - template - struct get_pattern<3, GT, 1> { - typedef Halo_Exchange_3D_DT type; - }; - - template - struct get_pattern<3, GT, 2> { - typedef Halo_Exchange_3D type; - }; - - template - struct get_grid; - - template <> - struct get_grid<2> { - typedef MPI_3D_process_grid_t<2> type; - }; - - template <> - struct get_grid<3> { - typedef MPI_3D_process_grid_t<3> type; - }; } // namespace _impl /** @@ -214,7 +181,6 @@ namespace gridtools { template class halo_exchange_dynamic_ut { @@ -229,14 +195,14 @@ namespace gridtools { /** Type of the computin grid associated to the pattern */ - /*typedef typename _impl::get_grid::type grid_type;*/ - typedef GridType grid_type; - static const uint_t DIMS = GridType::ndims; + typedef MPI_3D_process_grid_t<3> grid_type; + + static constexpr int DIMS = 3; + /** - Type of the Level 3 pattern used. This is available only if the pattern uses a Level 3 pattern. - In the case the implementation is not using L3, the type is not available. + Type of the Level 3 pattern used. */ - typedef typename _impl::get_pattern::type pattern_type; + typedef Halo_Exchange_3D pattern_type; private: template @@ -250,7 +216,7 @@ namespace gridtools { return CartComm; } - typedef hndlr_dynamic_ut hd_t; + typedef hndlr_dynamic_ut hd_t; hd_t hd; @@ -289,15 +255,15 @@ namespace gridtools { /** constructor that takes the periodicity (mathich the \link boollist_concept \endlink concept, and the MPI CART - communicator in DIMS (specified as template argument to the - pattern) dimensions of the processing grid. the periodicity is - specified in the order chosen by the programmer for the data, - as in the rest of the application. It is up tp the - construnctor implementation to translate it into the right - order depending on the gridtools::layout_map passed to the class. + communicator in 3 dimensions of the processing grid. the + periodicity is specified in the order chosen by the + programmer for the data, as in the rest of the + application. It is up to the constructor implementation + to translate it into the right order depending on the + gridtools::layout_map passed to the class. \param[in] c Periodicity specification as in \link boollist_concept \endlink - \param[in] comm MPI CART communicator with dimension DIMS (specified as template argument to the pattern). + \param[in] comm MPI CART communicator with dimension 3 */ explicit halo_exchange_dynamic_ut(typename grid_type::period_type const &c, MPI_Comm const &comm) : hd(c.template permute(), comm) {} @@ -496,19 +462,6 @@ namespace gridtools { grid_type const &comm() const { return hd.comm(); } }; - template - struct pick_version; - - template <> - struct pick_version<2> { - static const int value = gridtools::version_mpi_pack; - }; - - template <> - struct pick_version<3> { - static const int value = gridtools::version_manual; - }; - /** This is the main class for the halo exchange pattern in the case in which the data pointers, data types, and shapes are not known @@ -534,25 +487,25 @@ namespace gridtools { \tparam GCL_ARCH Specification of the "architecture", that is the place where the data to be exchanged is. Possible coiches are defined in low_level/gcl_arch.h . */ - template ::value> + template class halo_exchange_generic_base { public: - // typedef typename reverse_map::type layout2proc_map; + static constexpr int DIMS = 3; /** Type of the computin grid associated to the pattern */ - typedef typename _impl::get_grid::type grid_type; + typedef MPI_3D_process_grid_t<3> grid_type; /** Type of the Level 3 pattern used. This is available only if the pattern uses a Level 3 pattern. In the case the implementation is not using L3, the type is not available. */ - typedef typename _impl::get_pattern::type pattern_type; + typedef Halo_Exchange_3D pattern_type; private: - hndlr_generic hd; + hndlr_generic hd; public: /** constructor that takes the periodicity (matching the \link @@ -668,37 +621,17 @@ namespace gridtools { void wait() { hd.wait(); } }; - template - class halo_exchange_generic : public halo_exchange_generic_base { - - typedef halo_exchange_generic_base base_type; - // typedef typename layout_transform::type layout2proc_map; - - public: - typedef typename base_type::grid_type grid_type; - - typedef typename base_type::pattern_type pattern_type; - - template - struct traits { - static const int I = DIMS; - typedef empty_field base_field; - }; - - explicit halo_exchange_generic(typename grid_type::period_type const &c, MPI_Comm comm) : base_type(c, comm) {} - - explicit halo_exchange_generic(grid_type const &g) : base_type(g) {} - }; + template + class halo_exchange_generic; // different traits are needed - template - class halo_exchange_generic - : public halo_exchange_generic_base { + template + class halo_exchange_generic + : public halo_exchange_generic_base { - static const int version = version_manual; typedef gcl_cpu Gcl_Arch; - typedef halo_exchange_generic_base base_type; + typedef halo_exchange_generic_base base_type; public: typedef typename base_type::grid_type grid_type; @@ -707,8 +640,8 @@ namespace gridtools { template struct traits { - static const int I = DIMS; - typedef empty_field_no_dt base_field; + static const int I = 3; + typedef empty_field_no_dt base_field; }; explicit halo_exchange_generic(typename grid_type::period_type const &c, MPI_Comm comm) : base_type(c, comm) {} @@ -718,13 +651,12 @@ namespace gridtools { // different traits are needed template - class halo_exchange_generic - : public halo_exchange_generic_base { + class halo_exchange_generic + : public halo_exchange_generic_base { static const int DIMS = 3; - static const int version = version_manual; typedef gcl_gpu Gcl_Arch; - typedef halo_exchange_generic_base base_type; + typedef halo_exchange_generic_base base_type; public: typedef typename base_type::grid_type grid_type; diff --git a/include/gridtools/communication/high_level/descriptor_generic_manual.hpp b/include/gridtools/communication/high_level/descriptor_generic_manual.hpp index efba9006b3..78ab46a154 100644 --- a/include/gridtools/communication/high_level/descriptor_generic_manual.hpp +++ b/include/gridtools/communication/high_level/descriptor_generic_manual.hpp @@ -60,7 +60,7 @@ namespace gridtools { template - class hndlr_generic<3, HaloExch, proc_layout_abs, gcl_cpu, version_manual> : public descriptor_base { + class hndlr_generic : public descriptor_base { static const int DIMS = 3; gridtools::array::value> send_buffer; // One entry will not be used... gridtools::array::value> recv_buffer; @@ -381,7 +381,7 @@ namespace gridtools { #ifdef __CUDACC__ template - class hndlr_generic<3, HaloExch, proc_layout_abs, gcl_gpu, version_manual> : public descriptor_base { + class hndlr_generic : public descriptor_base { typedef gcl_gpu arch_type; static const int DIMS = 3; diff --git a/include/gridtools/communication/high_level/descriptors.hpp b/include/gridtools/communication/high_level/descriptors.hpp index f0d60d62d5..ad4b4a0492 100644 --- a/include/gridtools/communication/high_level/descriptors.hpp +++ b/include/gridtools/communication/high_level/descriptors.hpp @@ -53,10 +53,10 @@ namespace gridtools { \tparam DIMS the number of dimensions of the data field */ - template - class empty_field_no_dt : public empty_field_base { + class empty_field_no_dt : public empty_field_base { + static constexpr int DIMS = 3; - typedef empty_field_base base_type; + typedef empty_field_base base_type; public: /** @@ -69,31 +69,6 @@ namespace gridtools { const halo_descriptor *raw_array() const { return &(base_type::halos[0]); } - /** void pack(gridtools::array const& eta, iterator &it) - Pack the elements of a data field passed in input as iterator_in to be sent using the - iterator_out passed in that points to data buffers. At the end - the iterator_out points to the element next to the last inserted. In inout - the iterator_out points to the elements to be insered - - \param[in] eta the eta parameter as indicated in \link MULTI_DIM_ACCESS \endlink - \param[in] field_ptr iterator pointing to data field data - \param[in,out] it iterator pointing to the data. - */ - template - void pack(gridtools::array const &eta, iterator_in const *field_ptr, iterator_out *&it) const { - for (int j = base_type::halos[1].loop_low_bound_inside(eta[1]); - j <= base_type::halos[1].loop_high_bound_inside(eta[1]); - ++j) { - for (int i = base_type::halos[0].loop_low_bound_inside(eta[0]); - i <= base_type::halos[0].loop_high_bound_inside(eta[0]); - ++i) { - *(reinterpret_cast(it)) = field_ptr[gridtools::access( - i, j, base_type::halos[0].total_length(), base_type::halos[1].total_length())]; - reinterpret_cast(it) += sizeof(iterator_in); - } - } - } - template void pack(gridtools::array const &eta, iterator_in const *field_ptr, iterator_out *&it) const { // std::cout << "BASE ADDR IN PACK " << std::hex << reinterpret_cast(it) << std::dec << std::endl; @@ -104,14 +79,10 @@ namespace gridtools { // << "i=" << base_type::halos[0].loop_low_bound_inside(eta[0]) // << " to <= " << base_type::halos[0].loop_high_bound_inside(eta[0]) // << std::endl; - for (int k = base_type::halos[2].loop_low_bound_inside(eta[2]); - k <= base_type::halos[2].loop_high_bound_inside(eta[2]); - ++k) { - for (int j = base_type::halos[1].loop_low_bound_inside(eta[1]); - j <= base_type::halos[1].loop_high_bound_inside(eta[1]); + for (int k = halos[2].loop_low_bound_inside(eta[2]); k <= halos[2].loop_high_bound_inside(eta[2]); ++k) { + for (int j = halos[1].loop_low_bound_inside(eta[1]); j <= halos[1].loop_high_bound_inside(eta[1]); ++j) { - for (int i = base_type::halos[0].loop_low_bound_inside(eta[0]); - i <= base_type::halos[0].loop_high_bound_inside(eta[0]); + for (int i = halos[0].loop_low_bound_inside(eta[0]); i <= halos[0].loop_high_bound_inside(eta[0]); ++i) { // std::cout << " " @@ -128,46 +99,14 @@ namespace gridtools { // base_type::halos[2].total_length())] // << std::endl; - *(reinterpret_cast(it)) = field_ptr[gridtools::access(i, - j, - k, - base_type::halos[0].total_length(), - base_type::halos[1].total_length(), - base_type::halos[2].total_length())]; + *(reinterpret_cast(it)) = field_ptr[gridtools::access( + i, j, k, halos[0].total_length(), halos[1].total_length(), halos[2].total_length())]; reinterpret_cast(it) += sizeof(iterator_in); } } } } - /** void unpack(gridtools::array const& eta, iterator &it) - Unpack the elements into a data field passed in input as - iterator_in that have being received in data obtained by the - iterator_out passed in that points to data buffers. At the end - the iterator points to the element next to the last read element. In inout - the iterator points to the elements to be extracted from buffers and put - int the halo region. - - \param[in] eta the eta parameter as explained in \link MULTI_DIM_ACCESS \endlink of the sending neighbor - \param[in] field_ptr iterator pointing to data field data - \param[in,out] it iterator pointing to the data in buffers. - */ - template - void unpack(gridtools::array const &eta, iterator_in *field_ptr, iterator_out *&it) const { - for (int j = base_type::halos[1].loop_low_bound_outside(eta[1]); - j <= base_type::halos[1].loop_high_bound_outside(eta[1]); - ++j) { - for (int i = base_type::halos[0].loop_low_bound_outside(eta[0]); - i <= base_type::halos[0].loop_high_bound_outside(eta[0]); - ++i) { - field_ptr[gridtools::access( - i, j, base_type::halos[0].total_length(), base_type::halos[1].total_length())] = - *(reinterpret_cast(it)); - reinterpret_cast(it) += sizeof(iterator_in); - } - } - } - template void unpack(gridtools::array const &eta, iterator_in *field_ptr, iterator_out *&it) const { // std::cout << "k=" << base_type::halos[2].loop_low_bound_outside(eta[2]) @@ -177,14 +116,10 @@ namespace gridtools { // << "i=" << base_type::halos[0].loop_low_bound_outside(eta[0]) // << " to <= " << base_type::halos[0].loop_high_bound_outside(eta[0]) // << std::endl; - for (int k = base_type::halos[2].loop_low_bound_outside(eta[2]); - k <= base_type::halos[2].loop_high_bound_outside(eta[2]); - ++k) { - for (int j = base_type::halos[1].loop_low_bound_outside(eta[1]); - j <= base_type::halos[1].loop_high_bound_outside(eta[1]); + for (int k = halos[2].loop_low_bound_outside(eta[2]); k <= halos[2].loop_high_bound_outside(eta[2]); ++k) { + for (int j = halos[1].loop_low_bound_outside(eta[1]); j <= halos[1].loop_high_bound_outside(eta[1]); ++j) { - for (int i = base_type::halos[0].loop_low_bound_outside(eta[0]); - i <= base_type::halos[0].loop_high_bound_outside(eta[0]); + for (int i = halos[0].loop_low_bound_outside(eta[0]); i <= halos[0].loop_high_bound_outside(eta[0]); ++i) { // std::cout << " " // << i << ", " @@ -195,12 +130,9 @@ namespace gridtools { // << base_type::halos[2].total_length() << ", " // << *(reinterpret_cast(it)) // << std::endl; - field_ptr[gridtools::access(i, - j, - k, - base_type::halos[0].total_length(), - base_type::halos[1].total_length(), - base_type::halos[2].total_length())] = *(reinterpret_cast(it)); + field_ptr[gridtools::access( + i, j, k, halos[0].total_length(), halos[1].total_length(), halos[2].total_length())] = + *(reinterpret_cast(it)); reinterpret_cast(it) += sizeof(iterator_in); } } @@ -254,14 +186,6 @@ namespace gridtools { } }; - template - std::ostream &operator<<(std::ostream &s, empty_field_no_dt const &ef) { - s << "empty_field_no_dt "; - for (int i = 0; i < I; ++i) - s << ef.raw_array()[i] << ", "; - return s; - } - /** \class field_descriptor_no_dt Class containint the information about a data field (grid). It contains a pointer to the first element of the data field, @@ -280,11 +204,12 @@ namespace gridtools { \tparam DataType type of lements of the datafield \tparam DIMS the number of dimensions of the data field */ - template - class field_descriptor_no_dt : public empty_field_no_dt { + template + class field_descriptor_no_dt : public empty_field_no_dt { + static constexpr int DIMS = 3; DataType *fieldptr; // Pointer to the data field - typedef empty_field_no_dt base_type; + typedef empty_field_no_dt base_type; public: /** @@ -337,11 +262,12 @@ namespace gridtools { \tparam DIMS Number of dimensions of the grids. \tparam HaloExch Communication patter with halo exchange. */ - template + template class hndlr_descriptor_ut : public descriptor_base { - typedef hndlr_descriptor_ut this_type; + typedef hndlr_descriptor_ut this_type; + static constexpr int DIMS = 3; - std::vector> field; + std::vector> field; gridtools::array::value> send_buffer; // One entry will not be used... gridtools::array::value> recv_buffer; @@ -397,7 +323,7 @@ namespace gridtools { \return index of the field in the handler desctiptor */ size_t register_field(DataType *ptr) { - field.push_back(field_descriptor_no_dt(ptr)); + field.push_back(field_descriptor_no_dt(ptr)); return field.size() - 1; } @@ -423,7 +349,7 @@ namespace gridtools { int size() const { return field.size(); } - field_descriptor_no_dt const &data_field(int I) const { return field[I]; } + field_descriptor_no_dt const &data_field(int I) const { return field[I]; } /** Given the coordinates of a neighbor (2D), return the total number of elements to be sent to that neighbor associated with the handler of the manager. @@ -494,13 +420,13 @@ namespace gridtools { \tparam HaloExch Communication pattern with halo exchange. */ template - class hndlr_dynamic_ut : public descriptor_base { + class hndlr_dynamic_ut : public descriptor_base { static const int DIMS = GridType::ndims; - typedef hndlr_dynamic_ut this_type; + typedef hndlr_dynamic_ut this_type; public: - empty_field_no_dt halo; + empty_field_no_dt halo; private: gridtools::array::value> send_buffer; // One entry will not be used... diff --git a/include/gridtools/communication/high_level/descriptors_dt.hpp b/include/gridtools/communication/high_level/descriptors_dt.hpp deleted file mode 100644 index a423bfcfe8..0000000000 --- a/include/gridtools/communication/high_level/descriptors_dt.hpp +++ /dev/null @@ -1,867 +0,0 @@ -/* - * GridTools - * - * Copyright (c) 2014-2019, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include "../../common/array.hpp" -#include "../../common/boollist.hpp" -#include "../../common/gt_assert.hpp" -#include "../../common/layout_map_metafunctions.hpp" -#include "../../common/make_array.hpp" -#include "../../common/ndloops.hpp" -#include "../../common/numerics.hpp" -#include "../low_level/Halo_Exchange_3D.hpp" -#include "../low_level/data_types_mapping.hpp" -#include "../low_level/proc_grids_3D.hpp" -#include "empty_field_base.hpp" -#include "gcl_parameters.hpp" -#include - -#include "access.hpp" -#include "descriptor_base.hpp" -#include "descriptors_fwd.hpp" -#include "helpers_impl.hpp" -#include -#include -#include -#include -#include -#include - -namespace gridtools { - - /** \class empty_field - Class containint the information about a data field (grid). - It doe not contains any reference to actual data of the field, - it only describes the fields though the halo descriptions. - The number of dimensions as a template argument and the size of the - first dimension, the size of the non-halo data field, - the halo width before and after the actual data, then the same for the - second dimension, the third, etc. This information is encoded in - halo_descriptor. A dimension of the field is described as: - \code - |-----|------|---------------|---------|----| - | pad0|minus | length | plus |pad1| - ^begin ^end - | total_length | - \endcode - - \tparam DataType Type of elements contained in data arrays - \tparam DIMS the number of dimensions of the data field - - */ - template - class empty_field : public empty_field_base { - typedef empty_field_base base_type; - - public: - /** - Constructor that receive the pointer to the data. This is explicit and - must then be called. - */ - explicit empty_field() {} - - const halo_descriptor *raw_array() const { return &(base_type::halos[0]); } - - /** void pack(gridtools::array const& eta, iterator &it) - Pack the elements of a data field passed in input as iterator_in to be sent using the - iterator_out passed in that points to data buffers. At the end - the iterator_out points to the element next to the last inserted. In inout - the iterator_out points to the elements to be insered - - \param[in] eta the eta parameter as indicated in \link MULTI_DIM_ACCESS \endlink - \param[in] field_ptr iterator pointing to data field data - \param[in,out] it iterator pointing to the data. - */ - template - void pack(gridtools::array const &eta, iterator_in const &field_ptr, iterator_out &it) const { - if (base_type::MPDT_INSIDE[_impl::neigh_idx(eta)].second) { - int ss2; - MPI_Pack_size(1, base_type::MPDT_INSIDE[_impl::neigh_idx(eta)].first, gridtools::GCL_WORLD, &ss2); - - int I = 0; - MPI_Pack(field_ptr, - 1, - base_type::MPDT_INSIDE[_impl::neigh_idx(eta)].first, - it, - ss2, - &I, - gridtools::GCL_WORLD); - - it += - I / - sizeof(typename boost::remove_pointer::type>::type); - } else { - // nothing here - } - } - - /** void unpack(gridtools::array const& eta, iterator &it) - Unpack the elements into a data field passed in input as - iterator_in that have being received in data obtained by the - iterator_out passed in that points to data buffers. At the end - the iterator points to the element next to the last read element. In inout - the iterator points to the elements to be extracted from buffers and put - int the halo region. - - \param[in] eta the eta parameter as explained in \link MULTI_DIM_ACCESS \endlink of the sending neighbor - \param[in] field_ptr iterator pointing to data field data - \param[in,out] it iterator pointing to the data in buffers. - */ - template - void unpack(gridtools::array const &eta, iterator_in const &field_ptr, iterator_out &it) const { - if (base_type::MPDT_OUTSIDE[_impl::neigh_idx(eta)].second) { - int I = 0; - - MPI_Unpack(it, - base_type::recv_buffer_size(eta) * sizeof(DataType), - &I, - field_ptr, - 1, - base_type::MPDT_OUTSIDE[_impl::neigh_idx(eta)].first, - gridtools::GCL_WORLD); - - it += - I / - sizeof(typename boost::remove_pointer::type>::type); - } else { - // nothing here - } - } - - template - void pack_all(gridtools::array const &, iterator &it) const {} - - /** - This method takes a tuple eta identifiyng a neighbor \link MULTI_DIM_ACCESS \endlink - and a list of data fields and pack all the data corresponding - to the halo described by the class. The data is packed starting at - position pointed by iterator and the iterator will point to the next free - position at the end of the operation. - - \param[in] eta the eta parameter as explained in \link MULTI_DIM_ACCESS \endlink of the receiving neighbor - \param[in,out] it iterator pointing to storage area where data is packed - \param[in] field the first data field to be processed - \param[in] args the rest of the list of data fields to be packed (they may have different datatypes). - */ - template - void pack_all( - gridtools::array const &eta, iterator &it, FIRST const &field, const FIELDS &... args) const { - pack(eta, field, it); - pack_all(eta, it, args...); - } - - template - void unpack_all(gridtools::array const &, iterator &it) const {} - - /** - This method takes a tuple eta identifiyng a neighbor \link MULTI_DIM_ACCESS \endlink - and a list of data fields and pack all the data corresponding - to the halo described by the class. The data is packed starting at - position pointed by iterator and the iterator will point to the next free - position at the end of the operation. - - \param[in] eta the eta parameter as explained in \link MULTI_DIM_ACCESS \endlink of the sending neighbor - \param[in,out] it iterator pointing to the data to be unpacked - \param[in] field the first data field to be processed - \param[in] args the rest of the list of data fields where data has to be unpacked into (they may have - different - datatypes). - */ - template - void unpack_all( - gridtools::array const &eta, iterator &it, FIRST const &field, const FIELDS &... args) const { - unpack(eta, field, it); - unpack_all(eta, it, args...); - } - }; - - template - std::ostream &operator<<(std::ostream &s, empty_field const &ef) { - s << "empty_field "; - for (int i = 0; i < I; ++i) - s << ef.raw_array()[i] << ", "; - return s; - } - - /** \class field_descriptor - Class containint the information about a data field (grid). - It contains a pointer to the first element of the data field, - the number of dimensions as a template argument and the size of the - first dimension, the size of the non-halo data field, - the halo width before and after the actual data, then the same for the - second dimension, the third, etc. This information is encoded in - halo_descriptor. A dimension of the field is described as: - \code - |-----|------|---------------|---------|----| - | pad0|minus | length | plus |pad1| - ^begin ^end - | total_length | - \endcode - - \tparam DataType type of lements of the datafield - \tparam DIMS the number of dimensions of the data field - */ - template - class field_descriptor : public empty_field { - DataType *fieldptr; // Pointer to the data field - - typedef empty_field base_type; - - public: - /** - Constructor that receive the pointer to the data. This is explicit and - must then be called. - \param[in] _fp DataType* pointer to the data field - */ - explicit field_descriptor(DataType *_fp) : fieldptr(_fp) {} - - /** void pack(gridtools::array const& eta, iterator &it) - Pack the elements to be sent using the iterator passed in. At the end - the iterator points to the element next to the last inserted. In inout - the iterator points to the elements to be insered - - \param[in] eta the eta parameter as indicated in \link MULTI_DIM_ACCESS \endlink - \param[in,out] it iterator pointing to the data. - */ - template - void pack(gridtools::array const &eta, iterator &it) const { - base_type::pack(eta, fieldptr, it); - } - - /** void unpack(gridtools::array const& eta, iterator &it) - Unpack the elements received using the iterator passed in.. At the end - the iterator points to the element next to the last read element. In inout - the iterator points to the elements to be extracted from buffers and put - int the halo region. - - \param[in] eta the eta parameter as explained in \link MULTI_DIM_ACCESS \endlink of the sending neighbor - \param[in,out] it iterator pointing to the data in buffers. - */ - template - void unpack(gridtools::array const &eta, iterator &it) const { - base_type::unpack(eta, fieldptr, it); - } - }; - - /** - Class containing the description of one halo and a communication - pattern. A communication is triggered when a list of data - fields are passed to the exchange functions, when the data - according to the halo descriptors are echanged. This class is - needed when the addresses and the number of the data fields - changes dynamically but the sizes are constant. Data elements - for each hndlr_dynamic_ut must be the same. - - \tparam DataType Type of the elements in data arrays - \tparam DIMS Number of dimensions of the grids. - \tparam HaloExch Communication patter with halo exchange. - \tparam proc_layout Map between dimensions in increasing-stride order and processor grid dimensions - \tparam Gcl_Arch Specification of architecture used to indicate where the data is L3/include/gcl_arch.h file - reference - */ - template ::type, - typename Gcl_Arch = gridtools::gcl_cpu, - int VERSION = 0> - class hndlr_dynamic_ut : public descriptor_base { - typedef hndlr_dynamic_ut this_type; - static const int DIMS = GridType::ndims; - - public: - empty_field halo; - - private: - gridtools::array::value> send_buffer; // One entry will not be used... - gridtools::array::value> recv_buffer; - - public: - typedef descriptor_base base_type; - typedef base_type pattern_type; - - /** Architecture type - */ - typedef Gcl_Arch arch_type; - - /** - Type of the computin grid associated to the pattern - */ - typedef typename pattern_type::grid_type grid_type; - - /** - Type of the translation used to map dimensions to buffer addresses - */ - typedef translate_t::type> translate; - - private: - hndlr_dynamic_ut(hndlr_dynamic_ut const &) {} - - public: -#ifdef GCL_TRACE - void set_pattern_tag(int tag) { base_type::m_haloexch.set_pattern_tag(tag); }; -#endif - - /** - \brief Constructor - - \param[in] c The object of the class used to specify periodicity in each dimension - \param[in] comm MPI communicator (typically MPI_Comm_world) - \param[in] dimensions array of dimensions of the process grid - */ - template - explicit hndlr_dynamic_ut(typename grid_type::period_type const &c, MPI_Comm comm, Array const *dimensions) - : base_type(c, comm, dimensions), halo(), send_buffer{nullptr}, recv_buffer{nullptr} {} - - ~hndlr_dynamic_ut() { -#ifdef GCL_CHECK_DESTRUCTOR - std::cout << "Destructor " << __FILE__ << ":" << __LINE__ << std::endl; -#endif - - destroy::doit(*this); - } - - template - struct destroy; - - template - struct destroy<3, N> { - template - static void doit(T &descriptor) { - for (int i = -1; i <= 1; ++i) - for (int j = -1; j <= 1; ++j) - for (int k = -1; k <= 1; ++k) { - _impl::gcl_alloc::free(descriptor.send_buffer[translate()(i, j, k)]); - _impl::gcl_alloc::free(descriptor.recv_buffer[translate()(i, j, k)]); - } - } - }; - - /** - Constructor - - \param[in] c The object of the class used to specify periodicity in each dimension - \param[in] _P Number of processors the pattern is running on (numbered from 0 to _P-1 - \param[in] _pid Integer identifier of the process calling the constructor - */ - explicit hndlr_dynamic_ut(typename grid_type::period_type const &c, int _P, int _pid) - : base_type(grid_type(c, _P, _pid)), halo() {} - - /** - Constructor - - \param[in] g A processor grid that will execute the pattern - */ - explicit hndlr_dynamic_ut(grid_type const &g) : halo(), base_type(g) {} - - /** - Function to setup internal data structures for data exchange and preparing eventual underlying layers - - \param max_fields_n Maximum number of data fields that will be passed to the communication functions - */ - void setup(int max_fields_n) { - halo.setup(); - _impl::allocation_service()(this, max_fields_n); - } - - /** - Function to pack data to be sent - - \param[in] _fields data fields to be packed - */ - template - void pack(const FIELDS &... _fields) const { - pack_dims()(*this, _fields...); - } - - /** - Function to unpack received data - - \param[in] _fields data fields where to unpack data - */ - template - void unpack(const FIELDS &... _fields) const { - unpack_dims()(*this, _fields...); - } - - /** - Function to unpack received data - - \param[in] fields vector with data fields pointers to be packed from - */ - void pack(std::vector const &fields) { pack_vector_dims()(*this, fields); } - - /** - Function to unpack received data - - \param[in] fields vector with data fields pointers to be unpacked into - */ - void unpack(std::vector const &fields) { unpack_vector_dims()(*this, fields); } - - /// Utilities - - // FRIENDING - friend struct _impl::allocation_service; - - private: - template - struct pack_dims {}; - - template - struct pack_dims<3, dummy> { - template - void operator()(const T &hm, const FIELDS &... _fields) const { -#pragma omp parallel for schedule(dynamic, 1) collapse(3) - for (int ii = -1; ii <= 1; ++ii) { - for (int jj = -1; jj <= 1; ++jj) { - for (int kk = -1; kk <= 1; ++kk) { - typedef proc_layout map_type; - const int ii_P = pack_get_elem()>::apply(ii, jj, kk); - const int jj_P = pack_get_elem()>::apply(ii, jj, kk); - const int kk_P = pack_get_elem()>::apply(ii, jj, kk); - if ((ii != 0 || jj != 0 || kk != 0) && - (hm.pattern().proc_grid().proc(ii_P, jj_P, kk_P) != -1)) { - DataType *it = &(hm.send_buffer[translate()(ii, jj, kk)][0]); - hm.halo.pack_all(make_array(ii, jj, kk), it, _fields...); - } - } - } - } - } - }; - - template - struct unpack_dims {}; - - template - struct unpack_dims<3, dummy> { - template - void operator()(const T &hm, const FIELDS &... _fields) const { -#pragma omp parallel for schedule(dynamic, 1) collapse(3) - for (int ii = -1; ii <= 1; ++ii) { - for (int jj = -1; jj <= 1; ++jj) { - for (int kk = -1; kk <= 1; ++kk) { - typedef proc_layout map_type; - const int ii_P = pack_get_elem()>::apply(ii, jj, kk); - const int jj_P = pack_get_elem()>::apply(ii, jj, kk); - const int kk_P = pack_get_elem()>::apply(ii, jj, kk); - if ((ii != 0 || jj != 0 || kk != 0) && - (hm.pattern().proc_grid().proc(ii_P, jj_P, kk_P) != -1)) { - DataType *it = &(hm.recv_buffer[translate()(ii, jj, kk)][0]); - hm.halo.unpack_all(make_array(ii, jj, kk), it, _fields...); - } - } - } - } - } - }; - - template - struct pack_vector_dims {}; - - template - struct pack_vector_dims<3, dummy> { - template - void operator()(const T &hm, std::vector const &fields) const { -#pragma omp parallel for schedule(dynamic, 1) collapse(3) - for (int ii = -1; ii <= 1; ++ii) { - for (int jj = -1; jj <= 1; ++jj) { - for (int kk = -1; kk <= 1; ++kk) { - typedef proc_layout map_type; - const int ii_P = pack_get_elem()>::apply(ii, jj, kk); - const int jj_P = pack_get_elem()>::apply(ii, jj, kk); - const int kk_P = pack_get_elem()>::apply(ii, jj, kk); - if ((ii != 0 || jj != 0 || kk != 0) && - (hm.pattern().proc_grid().proc(ii_P, jj_P, kk_P) != -1)) { - DataType *it = &(hm.send_buffer[translate()(ii, jj, kk)][0]); - for (size_t i = 0; i < fields.size(); ++i) { - hm.halo.pack(make_array(ii, jj, kk), fields[i], it); - } - } - } - } - } - } - }; - - template - struct unpack_vector_dims {}; - - template - struct unpack_vector_dims<3, dummy> { - template - void operator()(const T &hm, std::vector const &fields) const { -#pragma omp parallel for schedule(dynamic, 1) collapse(3) - for (int ii = -1; ii <= 1; ++ii) { - for (int jj = -1; jj <= 1; ++jj) { - for (int kk = -1; kk <= 1; ++kk) { - typedef proc_layout map_type; - const int ii_P = pack_get_elem()>::apply(ii, jj, kk); - const int jj_P = pack_get_elem()>::apply(ii, jj, kk); - const int kk_P = pack_get_elem()>::apply(ii, jj, kk); - if ((ii != 0 || jj != 0 || kk != 0) && - (hm.pattern().proc_grid().proc(ii_P, jj_P, kk_P) != -1)) { - DataType *it = &(hm.recv_buffer[translate()(ii, jj, kk)][0]); - for (size_t i = 0; i < fields.size(); ++i) { - hm.halo.unpack(make_array(ii, jj, kk), fields[i], it); - } - } - } - } - } - } - }; - }; - - /** \class handler_manager_ut - Handler Manager is a class that keeps ona hndlr_descriptor and provide the - handlers for the library, the size information, the buffers allocation, and the - exchange calls. - - \tparam DataType type of the elements of the data fields associated to the handler. - \tparam DIMS Number of dimensions of the grids. - \tparam HaloExch pattern type used in communication. - */ - template - class handler_manager_ut { - hndlr_descriptor_ut *h; - - public: - /** - Creates a handler descriptor with a given argument. - - \tparam ARG Type of the input argument - \param[in] c The object of the class used to specify periodicity in each dimension - \param arg Value of the argument - */ - template - hndlr_descriptor_ut &create_handler( - typename HaloExch::grid_type::period_type const &c, ARG const &arg) { - h = new hndlr_descriptor_ut(c, arg); - return *h; - } - - /** - Creates a handler descriptor with a given two arguments. - - \tparam ARG0 Type of the input argument - \tparam ARG1 Type of the input argument - \param[in] c The object of the class used to specify periodicity in each dimension - \param arg0 Value of the argument - \param arg1 Value of the argument - */ - template - hndlr_descriptor_ut &create_handler( - typename HaloExch::grid_type::period_type const &c, ARG0 const &arg0, ARG1 const &arg1) { - h = new hndlr_descriptor_ut(c, arg0, arg1); - return *h; - } - - /** - Destroy the handler created by create_handler which cannot be reused - after this function returns. - */ - void destroy_handler(hndlr_descriptor_ut &h) { delete (&h); } - }; - - template - class hndlr_generic<3, HaloExch, proc_layout_abs, Gcl_Arch, version> : public descriptor_base { - static const int DIMS = 3; - gridtools::array::value> send_buffer; // One entry will not be used... - gridtools::array::value> recv_buffer; - gridtools::array::value> send_buffer_size; // One entry will not be used... - gridtools::array::value> recv_buffer_size; - - typedef Gcl_Arch arch_type; - - public: - typedef descriptor_base base_type; - typedef typename base_type::pattern_type pattern_type; - - /** - Type of the computin grid associated to the pattern - */ - typedef typename pattern_type::grid_type grid_type; - - /** - Type of the translation used to map dimensions to buffer addresses - */ - typedef translate_t::type> translate; - - hndlr_generic(grid_type const &g) : base_type(g) {} - - ~hndlr_generic() { -#ifdef GCL_CHECK_DESTRUCTOR - std::cout << "Destructor " << __FILE__ << ":" << __LINE__ << std::endl; -#endif - - for (int i = -1; i <= 1; ++i) - for (int j = -1; j <= 1; ++j) - for (int k = -1; k <= 1; ++k) { - _impl::gcl_alloc::free(send_buffer[translate()(i, j, k)]); - _impl::gcl_alloc::free(recv_buffer[translate()(i, j, k)]); - } - } - - /** - Setup function, in this version, takes tree parameters to - compute internal buffers and sizes. It takes a field on the fly - struct, which requires Datatype and layout map template - arguments that are inferred, so the user is not aware of them. - - \tparam DataType This type is inferred by halo_example paramter - \tparam layomap This type is inferred by halo_example paramter - - \param[in] max_fields_n Maximum number of grids used in a computation - \param[in] halo_example The (at least) maximal grid that is goinf to be used - \param[in] typesize In case the DataType of the halo_example is not the same as the maximum data type used in - the computation, this parameter can be given - */ - template class traits> - void setup(int max_fields_n, - field_on_the_fly const &halo_example, - int typesize = sizeof(DataType)) { - typedef typename field_on_the_fly::inner_layoutmap layomap; - gridtools::array eta; - for (int i = -1; i <= 1; ++i) { - for (int j = -1; j <= 1; ++j) { - for (int k = -1; k <= 1; ++k) { - if (i != 0 || j != 0 || k != 0) { - eta[0] = i; - eta[1] = j; - eta[2] = k; - int S = 1; - S = halo_example.send_buffer_size(eta); - int R = 1; - R = halo_example.recv_buffer_size(eta); - send_buffer[translate()(i, j, k)] = - _impl::gcl_alloc::alloc(S * max_fields_n * typesize); - recv_buffer[translate()(i, j, k)] = - _impl::gcl_alloc::alloc(R * max_fields_n * typesize); - send_buffer_size[translate()(i, j, k)] = (S * max_fields_n * typesize); - recv_buffer_size[translate()(i, j, k)] = (R * max_fields_n * typesize); - - // std::cout << halo_example << std::endl; - // printf("%d %d %d -> send %d, recv %d\n", i,j,k, send_buffer_size[translate()(i,j,k)], - // recv_buffer_size[translate()(i,j,k)]); - - typedef typename layout_transform::type proc_layout; - const int i_P = pack_get_elem()>::apply(i, j, k); - const int j_P = pack_get_elem()>::apply(i, j, k); - const int k_P = pack_get_elem()>::apply(i, j, k); - - base_type::m_haloexch.register_send_to_buffer( - &(send_buffer[translate()(i, j, k)][0]), S * max_fields_n * typesize, i_P, j_P, k_P); - - base_type::m_haloexch.register_receive_from_buffer( - &(recv_buffer[translate()(i, j, k)][0]), R * max_fields_n * typesize, i_P, j_P, k_P); - } - } - } - } - } - - /** - Setup function, in this version, takes a single parameter with - an array of sizes to be associated with the halos. - - \tparam DataType This type is inferred by halo_example paramter - \tparam layomap This type is inferred by halo_example paramter - - \param[in] buffer_size_list Array (gridtools::array) with the sizes of the buffers associated with the halos. - */ - template - void setup(gridtools::array::value> const &buffer_size_list) { - for (int i = -1; i <= 1; ++i) { - for (int j = -1; j <= 1; ++j) { - for (int k = -1; k <= 1; ++k) { - if (i != 0 || j != 0 || k != 0) { - send_buffer[translate()(i, j, k)] = - _impl::gcl_alloc::alloc(buffer_size_list[translate()(i, j, k)]); - recv_buffer[translate()(i, j, k)] = - _impl::gcl_alloc::alloc(buffer_size_list[translate()(i, j, k)]); - send_buffer_size[translate()(i, j, k)] = (buffer_size_list[translate()(i, j, k)]); - recv_buffer_size[translate()(i, j, k)] = (buffer_size_list[translate()(i, j, k)]); - - typedef typename layout_transform::type proc_layout; - const int i_P = pack_get_elem()>::apply(i, j, k); - const int j_P = pack_get_elem()>::apply(i, j, k); - const int k_P = pack_get_elem()>::apply(i, j, k); - - base_type::m_haloexch.register_send_to_buffer(&(send_buffer[translate()(i, j, k)][0]), - buffer_size_list[translate()(i, j, k)], - i_P, - j_P, - k_P); - - base_type::m_haloexch.register_receive_from_buffer(&(recv_buffer[translate()(i, j, k)][0]), - buffer_size_list[translate()(i, j, k)], - i_P, - j_P, - k_P); - } - } - } - } - } - - template - void pack(const FIELDS &... _fields) const { - for (int ii = -1; ii <= 1; ++ii) { - for (int jj = -1; jj <= 1; ++jj) { - for (int kk = -1; kk <= 1; ++kk) { - char *it = reinterpret_cast(&(send_buffer[translate()(ii, jj, kk)][0])); - pack_dims()(*this, /*make_array(*/ ii, jj, kk /*)*/, it, _fields...); - } - } - } - } - - template - void unpack(const FIELDS &... _fields) const { - for (int ii = -1; ii <= 1; ++ii) { - for (int jj = -1; jj <= 1; ++jj) { - for (int kk = -1; kk <= 1; ++kk) { - char *it = reinterpret_cast(&(recv_buffer[translate()(ii, jj, kk)][0])); - unpack_dims()(*this, ii, jj, kk, it, _fields...); - } - } - } - } - - /** - Function to unpack received data - - \tparam array_of_fotf this should be an array of field_on_the_fly - \param[in] fields vector with fields on the fly - */ - template class T3> - void pack(std::vector> const &fields) { - for (int ii = -1; ii <= 1; ++ii) { - for (int jj = -1; jj <= 1; ++jj) { - for (int kk = -1; kk <= 1; ++kk) { - char *it = reinterpret_cast(&(send_buffer[translate()(ii, jj, kk)][0])); - pack_vector_dims()(*this, ii, jj, kk, it, fields); - } - } - } - } - - /** - Function to unpack received data - - \tparam array_of_fotf this should be an array of field_on_the_fly - \param[in] fields vector with fields on the fly - */ - template class T3> - void unpack(std::vector> const &fields) { - for (int ii = -1; ii <= 1; ++ii) { - for (int jj = -1; jj <= 1; ++jj) { - for (int kk = -1; kk <= 1; ++kk) { - char *it = reinterpret_cast(&(recv_buffer[translate()(ii, jj, kk)][0])); - unpack_vector_dims()(*this, ii, jj, kk, it, fields); - } - } - } - } - - private: - template - struct pack_dims {}; - - template - struct pack_dims<3, dummy> { - - template - void operator()(const T &, int, int, int, iterator &) const {} - - template - void operator()( - const T &hm, int ii, int jj, int kk, iterator &it, FIRST const &first, const FIELDS &... _fields) - const { - typedef typename layout_transform::type proc_layout; - const int ii_P = pack_get_elem()>::apply(ii, jj, kk); - const int jj_P = pack_get_elem()>::apply(ii, jj, kk); - const int kk_P = pack_get_elem()>::apply(ii, jj, kk); - if ((ii != 0 || jj != 0 || kk != 0) && (hm.pattern().proc_grid().proc(ii_P, jj_P, kk_P) != -1)) { - first.pack(make_array(ii, jj, kk), first.ptr, it); - operator()(hm, ii, jj, kk, it, _fields...); - } - } - }; - - template - struct unpack_dims {}; - - template - struct unpack_dims<3, dummy> { - - template - void operator()(const T &, int, int, int, iterator &) const {} - - template - void operator()( - const T &hm, int ii, int jj, int kk, iterator &it, FIRST const &first, const FIELDS &... _fields) - const { - typedef typename layout_transform::type proc_layout; - const int ii_P = pack_get_elem()>::apply(ii, jj, kk); - const int jj_P = pack_get_elem()>::apply(ii, jj, kk); - const int kk_P = pack_get_elem()>::apply(ii, jj, kk); - if ((ii != 0 || jj != 0 || kk != 0) && (hm.pattern().proc_grid().proc(ii_P, jj_P, kk_P) != -1)) { - first.unpack(make_array(ii, jj, kk), first.ptr, it); - operator()(hm, ii, jj, kk, it, _fields...); - } - } - }; - - template - struct pack_vector_dims {}; - - template - struct pack_vector_dims<3, dummy> { - - template - void operator()(const T &hm, int ii, int jj, int kk, iterator &it, array_of_fotf const &_fields) const { - typedef typename layout_transform::type proc_layout; - const int ii_P = pack_get_elem()>::apply(ii, jj, kk); - const int jj_P = pack_get_elem()>::apply(ii, jj, kk); - const int kk_P = pack_get_elem()>::apply(ii, jj, kk); - if ((ii != 0 || jj != 0 || kk != 0) && (hm.pattern().proc_grid().proc(ii_P, jj_P, kk_P) != -1)) { - for (unsigned int fi = 0; fi < _fields.size(); ++fi) { - _fields[fi].pack(make_array(ii, jj, kk), _fields[fi].ptr, it); - } - } - } - }; - - template - struct unpack_vector_dims {}; - - template - struct unpack_vector_dims<3, dummy> { - - template - void operator()(const T &hm, int ii, int jj, int kk, iterator &it, array_of_fotf const &_fields) const { - typedef typename layout_transform::type proc_layout; - const int ii_P = pack_get_elem()>::apply(ii, jj, kk); - const int jj_P = pack_get_elem()>::apply(ii, jj, kk); - const int kk_P = pack_get_elem()>::apply(ii, jj, kk); - if ((ii != 0 || jj != 0 || kk != 0) && (hm.pattern().proc_grid().proc(ii_P, jj_P, kk_P) != -1)) { - for (unsigned int fi = 0; fi < _fields.size(); ++fi) { - _fields[fi].unpack(make_array(ii, jj, kk), _fields[fi].ptr, it); - } - } - } - }; - }; - -} // namespace gridtools diff --git a/include/gridtools/communication/high_level/descriptors_dt_whole.hpp b/include/gridtools/communication/high_level/descriptors_dt_whole.hpp deleted file mode 100644 index 36f160c30e..0000000000 --- a/include/gridtools/communication/high_level/descriptors_dt_whole.hpp +++ /dev/null @@ -1,249 +0,0 @@ -/* - * GridTools - * - * Copyright (c) 2014-2019, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include "../../common/array.hpp" -#include "../../common/gt_assert.hpp" -#include "../../common/make_array.hpp" -#include - -#include "../../common/boollist.hpp" -#include "../../common/halo_descriptor.hpp" -#include "../../common/layout_map_metafunctions.hpp" -#include "../../common/ndloops.hpp" -#include "../low_level/data_types_mapping.hpp" -#include "gcl_parameters.hpp" - -#include "../../common/numerics.hpp" -#include "descriptor_base.hpp" -#include "descriptors_fwd.hpp" -#include "helpers_impl.hpp" -#include -#include -#include -#include -#include -#include -#include - -namespace gridtools { - - /** - Class containing the description of one halo and a communication - pattern. A communication is triggered when a list of data - fields are passed to the exchange functions, when the data - according to the halo descriptors are echanged. This class is - needed when the addresses and the number of the data fields - changes dynamically but the sizes are constant. Data elements - for each hndlr_dynamic_ut must be the same. - - \tparam DataType Type of the elements in data arrays - \tparam DIMS Number of dimensions of the grids. - \tparam HaloExch Communication patter with halo exchange. - \tparam proc_layout Map between dimensions in increasing-stride order and processor grid dimensions - \tparam Gcl_Arch Specification of architecture used to indicate where the data is L3/include/gcl_arch.h file - reference - */ - template class GridType> - class hndlr_dynamic_ut, Halo_Exchange_3D_DT, proc_layout, Gcl_Arch, 1> - : public descriptor_base> { - static const int DIMS = 3; - static const int MaxFields = 20; - typedef descriptor_base> base_type; - typedef typename base_type::pattern_type HaloExch; - typedef hndlr_dynamic_ut, HaloExch, proc_layout, Gcl_Arch, 1> this_type; - typedef array::value> MPDT_t; - - // typedef array, _impl::static_pow3::value> MPDT_array_t; - // MPDT_array_t MPDT_array_in, MPDT_array_out; - public: - empty_field halo; - - private: - gridtools::array offsets; // 20 is the max number of fields passed in - gridtools::array counts; // 20 is the max number of fields passed in - - MPDT_t MPDT_INSIDE, MPDT_OUTSIDE; - - public: - typedef typename base_type::pattern_type pattern_type; - - /** - Type of the computin grid associated to the pattern - */ - typedef typename base_type::grid_type grid_type; - - /** - Type of the translation used to map dimensions to buffer addresses - */ - typedef translate_t::type> translate; - - private: - hndlr_dynamic_ut(hndlr_dynamic_ut const &) {} - - public: - /** - Constructor - - \param[in] c The object of the class used to specify periodicity in each dimension - \param[in] comm MPI communicator (typically MPI_Comm_world) - \param[in] dimensions array of dimensions of the process grid - */ - template - explicit hndlr_dynamic_ut(typename grid_type::period_type const &c, MPI_Comm comm, Array const *dimensions) - : base_type(c, comm, dimensions), halo() { - for (int i = 0; i < MaxFields; ++i) - counts[i] = 1; - } - - /** - Constructor - - \param[in] c The object of the class used to specify periodicity in each dimension - \param[in] _P Number of processors the pattern is running on (numbered from 0 to _P-1 - \param[in] _pid Integer identifier of the process calling the constructor - */ - explicit hndlr_dynamic_ut(typename grid_type::period_type const &c, int _P, int _pid) - : base_type(c, _P, _pid), halo() { - for (int i = 0; i < MaxFields; ++i) - counts[i] = 1; - } - - /** - Constructor - - \param[in] g A processor grid that will execute the pattern - */ - explicit hndlr_dynamic_ut(grid_type const &g) : base_type(g), halo() { - for (int i = 0; i < MaxFields; ++i) - counts[i] = 1; - } - - /** - Function to setup internal data structures for data exchange and preparing eventual underlying layers - - \param * Maximum number of data fields that will be passed to the communication functions - */ - void setup(int) { halo.setup(); } - - /** - Function to unpack received data - - \param[in] fields vector with data fields pointers to be packed from - */ - void pack(std::vector const &fields) { - // Create an MPI data types with data types of the different fields. - for (unsigned int k = 0; k < fields.size(); ++k) { - offsets[k] = reinterpret_cast(fields[k]) - reinterpret_cast(fields[0]); - } - - for (int i = -1; i <= 1; ++i) { - for (int j = -1; j <= 1; ++j) { - for (int k = -1; k <= 1; ++k) { - if (i != 0 || j != 0 || k != 0) { - array eta = make_array(i, j, k); - if (halo.mpdt_inside(eta).second) { - MPI_Type_create_hindexed(fields.size(), - &(counts[0]), - &(offsets[0]), - halo.mpdt_inside(eta).first, - &(MPDT_INSIDE[_impl::neigh_idx(eta)])); - MPI_Type_commit(&MPDT_INSIDE[_impl::neigh_idx(eta)]); - } - if (halo.mpdt_outside(eta).second) { - MPI_Type_create_hindexed(fields.size(), - &(counts[0]), - &(offsets[0]), - halo.mpdt_outside(eta).first, - &(MPDT_OUTSIDE[_impl::neigh_idx(eta)])); - MPI_Type_commit(&MPDT_OUTSIDE[_impl::neigh_idx(eta)]); - } - } - } - } - } - - for (int i = -1; i <= 1; ++i) { - for (int j = -1; j <= 1; ++j) { - for (int k = -1; k <= 1; ++k) { - if (i != 0 || j != 0 || k != 0) { - array eta = make_array(i, j, k); - if (halo.mpdt_inside(eta).second) { - typedef translate_t<3, proc_layout> translate_P; - typedef typename translate_P::map_type map_type; - const int i_P = pack_get_elem()>::apply(i, j, k); - const int j_P = pack_get_elem()>::apply(i, j, k); - const int k_P = pack_get_elem()>::apply(i, j, k); - - base_type::m_haloexch.register_send_to_buffer( - fields[0], MPDT_INSIDE[_impl::neigh_idx(eta)], 1, i_P, j_P, k_P); - - } else { - typedef translate_t<3, proc_layout> translate_P; - typedef typename translate_P::map_type map_type; - const int i_P = pack_get_elem()>::apply(i, j, k); - const int j_P = pack_get_elem()>::apply(i, j, k); - const int k_P = pack_get_elem()>::apply(i, j, k); - - base_type::m_haloexch.register_send_to_buffer(nullptr, MPI_INT, 0, i_P, j_P, k_P); - } - - if (halo.mpdt_outside(eta).second) { - typedef translate_t<3, proc_layout> translate_P; - typedef typename translate_P::map_type map_type; - const int i_P = pack_get_elem()>::apply(i, j, k); - const int j_P = pack_get_elem()>::apply(i, j, k); - const int k_P = pack_get_elem()>::apply(i, j, k); - - base_type::m_haloexch.register_receive_from_buffer( - fields[0], MPDT_OUTSIDE[_impl::neigh_idx(eta)], 1, i_P, j_P, k_P); - } else { - typedef translate_t<3, proc_layout> translate_P; - typedef typename translate_P::map_type map_type; - const int i_P = pack_get_elem()>::apply(i, j, k); - const int j_P = pack_get_elem()>::apply(i, j, k); - const int k_P = pack_get_elem()>::apply(i, j, k); - - base_type::m_haloexch.register_receive_from_buffer(nullptr, MPI_INT, 0, i_P, j_P, k_P); - } - } - } - } - } - } - /** - Function to unpack received data - - \param[in] fields vector with data fields pointers to be unpacked into - */ - void unpack(std::vector const &fields) { - for (int i = -1; i <= 1; ++i) { - for (int j = -1; j <= 1; ++j) { - for (int k = -1; k <= 1; ++k) { - if (i != 0 || j != 0 || k != 0) { - array eta = make_array(i, j, k); - if (halo.mpdt_inside(eta).second) { - MPI_Type_free(&MPDT_INSIDE[_impl::neigh_idx(eta)]); - } - if (halo.mpdt_outside(eta).second) { - MPI_Type_free(&MPDT_OUTSIDE[_impl::neigh_idx(eta)]); - } - } - } - } - } - } - }; - -} // namespace gridtools diff --git a/include/gridtools/communication/high_level/descriptors_fwd.hpp b/include/gridtools/communication/high_level/descriptors_fwd.hpp index 89e3baba91..516139ca39 100644 --- a/include/gridtools/communication/high_level/descriptors_fwd.hpp +++ b/include/gridtools/communication/high_level/descriptors_fwd.hpp @@ -10,22 +10,16 @@ #pragma once namespace gridtools { - template + template class hndlr_descriptor_ut; - template + template class hndlr_dynamic_ut; - template ::type, - typename Gcl_Arch = gcl_cpu, - int = version_mpi_pack> + template class hndlr_generic; template class traits> struct field_on_the_fly; - template - class hndlr_generic; } // namespace gridtools diff --git a/include/gridtools/communication/high_level/descriptors_manual_gpu.hpp b/include/gridtools/communication/high_level/descriptors_manual_gpu.hpp index 0d20a67fe4..3efd11ae7e 100644 --- a/include/gridtools/communication/high_level/descriptors_manual_gpu.hpp +++ b/include/gridtools/communication/high_level/descriptors_manual_gpu.hpp @@ -46,9 +46,9 @@ namespace gridtools { \tparam DIMS the number of dimensions of the data field */ template - class empty_field_no_dt_gpu : public empty_field_base { + class empty_field_no_dt_gpu : public empty_field_base { - typedef empty_field_base base_type; + typedef empty_field_base base_type; public: /** @@ -62,7 +62,7 @@ namespace gridtools { const halo_descriptor *raw_array() const { return &(base_type::halos[0]); } protected: - template + template friend class hndlr_dynamic_ut; template @@ -82,12 +82,11 @@ namespace gridtools { #ifdef __CUDACC__ /** specialization for GPU and manual packing */ template class GridType> - class hndlr_dynamic_ut, HaloExch, proc_layout, gcl_gpu, 2> - : public descriptor_base { + class hndlr_dynamic_ut, HaloExch, proc_layout, gcl_gpu> : public descriptor_base { static const int DIMS = 3; - typedef hndlr_dynamic_ut, HaloExch, proc_layout, gcl_gpu, 2> this_type; + typedef hndlr_dynamic_ut, HaloExch, proc_layout, gcl_gpu> this_type; public: empty_field_no_dt_gpu halo; diff --git a/include/gridtools/communication/high_level/empty_field_base.hpp b/include/gridtools/communication/high_level/empty_field_base.hpp index 2bd5b072a6..c6e4eb6f73 100644 --- a/include/gridtools/communication/high_level/empty_field_base.hpp +++ b/include/gridtools/communication/high_level/empty_field_base.hpp @@ -109,8 +109,10 @@ namespace gridtools { }; } // namespace _impl - template + template class empty_field_base { + static constexpr int DIMS = 3; + typedef array HALO_t; public: diff --git a/include/gridtools/communication/high_level/helpers_impl.hpp b/include/gridtools/communication/high_level/helpers_impl.hpp index 1b10741282..0777abf6ab 100644 --- a/include/gridtools/communication/high_level/helpers_impl.hpp +++ b/include/gridtools/communication/high_level/helpers_impl.hpp @@ -58,9 +58,9 @@ namespace gridtools { struct allocation_service; template - struct allocation_service> { - void operator()(hndlr_descriptor_ut *hm) const { - typedef typename hndlr_descriptor_ut::pattern_type::translate_type translate; + struct allocation_service> { + void operator()(hndlr_descriptor_ut *hm) const { + typedef typename hndlr_descriptor_ut::pattern_type::translate_type translate; for (int ii = -1; ii <= 1; ++ii) for (int jj = -1; jj <= 1; ++jj) for (int kk = -1; kk <= 1; ++kk) @@ -88,14 +88,9 @@ namespace gridtools { } }; - template class GridType> - struct allocation_service, T2, procmap, arch, V>> { - void operator()(hndlr_dynamic_ut, T2, procmap, arch, V> *hm, int mf) const { + template class GridType> + struct allocation_service, T2, procmap, arch>> { + void operator()(hndlr_dynamic_ut, T2, procmap, arch> *hm, int mf) const { typedef translate_t<3, default_layout_map<3>::type> translate; typedef translate_t<3, procmap> translate_P; @@ -139,9 +134,9 @@ namespace gridtools { struct pack_service; template - struct pack_service> { - void operator()(hndlr_descriptor_ut const *hm) const { - typedef typename hndlr_descriptor_ut::pattern_type::translate_type translate; + struct pack_service> { + void operator()(hndlr_descriptor_ut const *hm) const { + typedef typename hndlr_descriptor_ut::pattern_type::translate_type translate; for (int ii = -1; ii <= 1; ++ii) for (int jj = -1; jj <= 1; ++jj) for (int kk = -1; kk <= 1; ++kk) @@ -157,9 +152,9 @@ namespace gridtools { struct unpack_service; template - struct unpack_service> { - void operator()(hndlr_descriptor_ut const *hm) const { - typedef typename hndlr_descriptor_ut::pattern_type::translate_type translate; + struct unpack_service> { + void operator()(hndlr_descriptor_ut const *hm) const { + typedef typename hndlr_descriptor_ut::pattern_type::translate_type translate; for (int ii = -1; ii <= 1; ++ii) for (int jj = -1; jj <= 1; ++jj) for (int kk = -1; kk <= 1; ++kk) diff --git a/include/gridtools/communication/low_level/Halo_Exchange.hpp b/include/gridtools/communication/low_level/Halo_Exchange.hpp deleted file mode 100644 index d60cabe562..0000000000 --- a/include/gridtools/communication/low_level/Halo_Exchange.hpp +++ /dev/null @@ -1,26 +0,0 @@ -/* - * GridTools - * - * Copyright (c) 2014-2019, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -namespace gridtools { - /* here we store the buckets data structure, first target is 2D - */ - template // ALIGN_SIZE is the size of the types used. Need to specify - // better what it is. It is needed to allow send of receive to - // other types (like MPI types) to use to send data. - // there migh be a mpi_type<8>::value to be MPI_DOUBLE and - // mpi_type<8>::divisor as the value to divide the legnth of the - // buffer in bytes to compute sizes correctly. this is just a - // proposal. - struct Halo_Exchange_2D {}; - - template - struct Halo_Exchange_3D {}; -} // namespace gridtools diff --git a/include/gridtools/communication/low_level/Halo_Exchange_2D.hpp b/include/gridtools/communication/low_level/Halo_Exchange_2D.hpp deleted file mode 100644 index 5970e4b291..0000000000 --- a/include/gridtools/communication/low_level/Halo_Exchange_2D.hpp +++ /dev/null @@ -1,555 +0,0 @@ -/* - * GridTools - * - * Copyright (c) 2014-2019, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include "../GCL.hpp" -#include "has_communicator.hpp" -#include "translate.hpp" -#include -#include - -/** \file - * Pattern for regular cyclic and acyclic halo exchange pattern in 2D - * The communicating processes are arganized in a 2D grid. Given a process, neighbors processes - * are located using relative coordinates. In the next diagram, the given process is (0,0) - * while the neighbors are indicated with their relative coordinates. - * \code - * ------------------------- - * | | | | - * | -1,-1 | -1,0 | -1,1 | - * | | | | - * ------------------------- - * | | | | - * | 0,-1 | 0,0 | 0,1 | - * | | | | - * ------------------------- - * | | | | - * | 1,-1 | 1,0 | 1,1 | - * | | | | - * ------------------------- - * \endcode - */ - -/** \namespace gridtools - * All library classes, functions, and objects will reside in this namespace. - */ -namespace gridtools { - - /** \class Halo_Exchange_2D - * Class to instantiate, define and run a regular cyclic and acyclic - * halo exchange pattern in 2D. By regular it is intended that the - * amount of data sent and received during the execution of the - * pattern is known by all participants to the comunciation without - * communication. More specifically, the ampunt of data received is - * decided before the execution of the pattern. If a different - * ampunt of data is received from some process the behavior is - * undefined.\n - * Given a process (i,j), we can define \f$s_{ij}^{mn}\f$ and - * \f$r_{ij}^{mn}\f$ as the data sent and received from process - * (i,j) to/from process (i+m, j+n), respectively. For this pattern - * m and n are supposed to be in the range -1, 0, +1. \n\n When - * executing the Halo_Exchange_2D pattern, the requirement is that - * \f[r_{ij}^{mn} = s_{i+m,j+n}^{-m,-n}\f]. - * \n - * \tparam PROG_GRID Processor Grid type. An object of this type will be passed to constructor. - * \tparam ALIGN integer parameter that specify the alignment of the data to used. UNUSED IN CURRENT VERSION - * \n\n\n - * Pattern for regular cyclic and acyclic halo exchange pattern in 2D - * The communicating processes are arganized in a 2D grid. Given a process, neighbors processes - * are located using relative coordinates. In the next diagram, the given process is (0,0) - * while the neighbors are indicated with their relative coordinates. - * \code - * ------------------------- - * | | | | - * | -1,-1 | -1,0 | -1,1 | - * | | | | - * ------------------------- - * | | | | - * | 0,-1 | 0,0 | 0,1 | - * | | | | - * ------------------------- - * | | | | - * | 1,-1 | 1,0 | 1,1 | - * | | | | - * ------------------------- - * \endcode - * The pattern is cyclic or not bepending on the process grid passed to it. The cyclicity may be on only one - dimension. - * An example of use of the pattern is given below - \code - int iminus; - int iplus; - int jminus; - int jplus; - int iminusjminus; - int iplusjminus; - int iminusjplus; - int iplusjplus; - - int iminus_r; - int iplus_r; - int jminus_r; - int jplus_r; - int iminusjminus_r; - int iplusjminus_r; - int iminusjplus_r; - int iplusjplus_r; - - typedef gridtools::_2D_proc_grid_t grid_type; - - grid_type pg(P,my_id); - - gridtools::Halo_Exchange_2D he(pg); - - he.register_send_to_buffer<-1,-1>(&iminusjminus, sizeof(int)); - he.register_send_to_buffer<-1, 1>(&iminusjplus, sizeof(int)); - he.register_send_to_buffer< 1,-1>(&iplusjminus, sizeof(int)); - he.register_send_to_buffer< 1, 1>(&iplusjplus, sizeof(int)); - he.register_send_to_buffer<-1, 0>(&iminus, sizeof(int)); - he.register_send_to_buffer< 1, 0>(&iplus, sizeof(int)); - he.register_send_to_buffer< 0,-1>(&jminus, sizeof(int)); - he.register_send_to_buffer< 0, 1>(&jplus, sizeof(int)); - - he.register_receive_from_buffer<-1,-1>(&iminusjminus_r, sizeof(int)); - he.register_receive_from_buffer<-1, 1>(&iminusjplus_r, sizeof(int)); - he.register_receive_from_buffer< 1,-1>(&iplusjminus_r, sizeof(int)); - he.register_receive_from_buffer< 1, 1>(&iplusjplus_r, sizeof(int)); - he.register_receive_from_buffer<-1, 0>(&iminus_r, sizeof(int)); - he.register_receive_from_buffer< 1, 0>(&iplus_r, sizeof(int)); - he.register_receive_from_buffer< 0,-1>(&jminus_r, sizeof(int)); - he.register_receive_from_buffer< 0, 1>(&jplus_r, sizeof(int)); - - he.exchange(); - \endcode - */ - template - class Halo_Exchange_2D { - - typedef translate_t<2, typename default_layout_map<2>::type> translate; - - class sr_buffers { - char *m_buffers[9]; // there is ona buffer more to allow for a simple indexing - int m_size[9]; // Sizes in bytes - - public: - explicit sr_buffers() { - m_buffers[0] = nullptr; - m_buffers[1] = nullptr; - m_buffers[2] = nullptr; - m_buffers[3] = nullptr; - m_buffers[4] = nullptr; - m_buffers[5] = nullptr; - m_buffers[6] = nullptr; - m_buffers[7] = nullptr; - m_buffers[8] = nullptr; - m_size[0] = 0; - m_size[1] = 0; - m_size[2] = 0; - m_size[3] = 0; - m_size[4] = 0; - m_size[5] = 0; - m_size[6] = 0; - m_size[7] = 0; - m_size[8] = 0; - } - - char *&buffer(int I, int J) { return m_buffers[translate()(I, J)]; } - int &size(int I, int J) { return m_size[translate()(I, J)]; } - int size(int I, int J) const { return m_size[translate()(I, J)]; } - }; - - template - struct TAG { - static const int value = (I + 1) * 3 + J + 1; - }; - - struct request_t { - MPI_Request request[9]; - MPI_Request &operator()(int i, int j) { return request[translate()(i, j)]; } - }; - - const PROC_GRID m_proc_grid; - sr_buffers m_send_buffers; - sr_buffers m_recv_buffers; - request_t request; - request_t send_request; - - template - void post_receive() { -#ifndef NDEBUG - std::cout << "@" << gridtools::PID << "@ IRECV from (" << I << "," << J << ") " - << " P " << m_proc_grid.template proc() << " - " - << " T " << TAG<-I, -J>::value << " - " - << " R " << translate()(-I, -J) << " Amount " << m_recv_buffers.size(I, J) << "\n"; -#endif - - MPI_Irecv(static_cast(m_recv_buffers.buffer(I, J)), - m_recv_buffers.size(I, J), - MPI_CHAR, - m_proc_grid.template proc(), - TAG<-I, -J>::value, - get_communicator(m_proc_grid), - &request(-I, -J)); - } - - template - void perform_isend() { -#ifndef NDEBUG - std::cout << "@" << gridtools::PID << "@ ISEND to (" << I << "," << J << ") " - << " P " << m_proc_grid.template proc() << " - " - << " T " << TAG::value << " - " - << " R " << translate()(I, J) << " Amount " << m_send_buffers.size(I, J) << "\n"; -#endif - MPI_Isend(static_cast(m_send_buffers.buffer(I, J)), - m_send_buffers.size(I, J), - MPI_CHAR, - m_proc_grid.template proc(), - TAG::value, - get_communicator(m_proc_grid), - &send_request(I, J)); - } - - template - void wait() { -#ifndef NDEBUG - std::cout << "@" << gridtools::PID << "@ WAIT (" << I << "," << J << ") " - << " R " << translate()(-I, -J) << "\n"; -#endif - - MPI_Status status; - MPI_Wait(&request(-I, -J), &status); - } - - public: - /** Type of the processor grid used by the pattern - */ - typedef PROC_GRID grid_type; - - /** Type of the translation map to map processors to buffers. - */ - typedef translate translate_type; - - /** Constructor that takes the process grid. Must be executed by all the processes in the grid. - * It is not possible to change the process grid once the pattern has beeninstantiated. - * - */ - explicit Halo_Exchange_2D(PROC_GRID _pg) - : m_proc_grid(_pg), m_send_buffers(), m_recv_buffers(), request(), send_request() {} - - /** Returns the processor grid (as const reference) been used in construction - - If used to get process grid information additional information can be - found in \link GRIDS_INTERACTION \endlink - */ - PROC_GRID const &proc_grid() const { return m_proc_grid; } - - /** Function to register send buffers with the communication patter. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. It is possible to override the - previous pointer by re-registering a new pointer with a given destination. - \param[in] p Pointer to the first element of type T to send - \param[in] s Number of bytes (not number of elements) to be send. In any case this is the amount of data - sent. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - */ - void register_send_to_buffer(void *p, int s, int I, int J) { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - -#ifndef NDEBUG - std::cout << "@" << gridtools::PID << "@ " << __PRETTY_FUNCTION__ << " : " << p << " size " << s - << " I:" << I << " J:" << J << " (" << translate()(I, J) << ")\n"; -#endif - - m_send_buffers.buffer(I, J) = reinterpret_cast(p); - m_send_buffers.size(I, J) = s; - } - - /** Function to register send buffers with the communication patter. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. It is possible to override the - previous pointer by re-registering a new pointer with a given destination. - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \param[in] p Pointer to the first element of type T to send - \param[in] s Number of bytes (not number of elements) to be send. In any case this is the amount of data - sent. - */ - template - void register_send_to_buffer(void *p, int s) { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - - register_send_to_buffer(p, s, I, J); - } - - /** Function to register buffers for received data with the communication patter. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - received from that process. The amount of data is specified as number of bytes. It is possible to override - the previous pointer by re-registering a new pointer with a given source. - \param[in] p Pointer to the first element of type T where to put received data - \param[in] s Number of bytes (not number of elements) expected to be received. This is the data that is - assumed to arrive. If less data arrives, the behaviour is undefined. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - */ - void register_receive_from_buffer(void *p, int s, int I, int J) { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - -#ifndef NDEBUG - std::cout << "@" << gridtools::PID << "@ " << __PRETTY_FUNCTION__ << " : " << p << " size " << s - << " I:" << I << " J:" << J << " (" << translate()(I, J) << ")\n"; -#endif - - m_recv_buffers.buffer(I, J) = reinterpret_cast(p); - m_recv_buffers.size(I, J) = s; - } - - /** Function to register buffers for received data with the communication patter. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - received from that process. The amount of data is specified as number of bytes. It is possible to override - the previous pointer by re-registering a new pointer with a given source. - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \param[in] p Pointer to the first element of type T where to put received data - \param[in] s Number of bytes (not number of elements) expected to be received. This is the data that is - assumed to arrive. If less data arrives, the behaviour is undefined. - */ - template - void register_receive_from_buffer(void *p, int s) { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - - register_receive_from_buffer(p, s, I, J); - } - - /* Setting sizes */ - - /** Function to set send buffers sizes if the size must be updated from a previous registration. The same - pointer passed during registration will be used to send data. It is possible to override the previous pointer - by re-registering a new pointer with a given destination. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. - \param[in] s Number of bytes (not number of elements) to be sent. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - */ - void set_send_to_size(int s, int I, int J) { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - - m_send_buffers.size(I, J) = s; - } - - /** Function to set send buffers sizes if the size must be updated from a previous registration. The same - pointer passed during registration will be used to send data. It is possible to override the previous pointer - by re-registering a new pointer with a given destination. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \param[in] s Number of bytes (not number of elements) to be sent. - */ - template - void set_send_to_size(int s) const { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - - set_send_to_size(s, I, J); - } - - /** Function to set receive buffers sizes if the size must be updated from a previous registration. The same - pointer passed during registration will be used to receive data. It is possible to override the previous - pointer by re-registering a new pointer with a given source. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. - \param[in] s Number of bytes (not number of elements) to be packed. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - */ - void set_receive_from_size(int s, int I, int J) const { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - - m_send_buffers.size(I, J) = s; - } - - /** Function to set receive buffers sizes if the size must be updated from a previous registration. The same - pointer passed during registration will be used to receive data. It is possible to override the previous - pointer by re-registering a new pointer with a given source. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \param[in] s Number of bytes (not number of elements) to be packed. - */ - template - void set_receive_from_size(int s) const { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - - set_receive_from_size(s, I, J); - } - - /** Retrieve the size of the buffer containing data to be sent to neighbor I, J. - - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - */ - int send_size(int I, int J) const { return m_send_buffers.size(I, J); } - - /** Retrieve the size of the buffer containing data to be received from neighbor I, J. - - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - */ - int recv_size(int I, int J) const { return m_recv_buffers.size(I, J); } - - /** When called this function executes the communication pattern, that is, send all the send-buffers to the - * correspondinf receive-buffers. When the function returns the data in receive buffers can be safely accessed. - */ - void exchange() { - - // cout << GSL_pid() << " proc coords: " << r << " " << c << endl; - /* NORTH/IMINUS - |---------| |---------| |---------| |---------| |---------| |---------| |---------| |---------| - | | | | | | | | | | | | | | | | |------- | | -------| - | | |---------| | | | | | | | r0 | | r>0 | | | | r>0 | - WEST | | | r>0 | | | | | | c>0 | | | | | | | | c0 | - EAST - JMINUS|---------| | | | | | | | | | | | | | | | | | | | |JPLUS - | | | | | | | | | | |------- | | -------| | | | | | | - |---------| |---------| |---------| |---------| |---------| |---------| |---------| |---------| - SOUTH/IPLUS - */ - - /* Posting receives - */ - if (m_proc_grid.template proc<1, 0>() != -1) { - post_receive<1, 0>(); - } - - if (m_proc_grid.template proc<-1, 0>() != -1) { - post_receive<-1, 0>(); - } - - if (m_proc_grid.template proc<0, 1>() != -1) { - post_receive<0, 1>(); - } - - if (m_proc_grid.template proc<0, -1>() != -1) { - post_receive<0, -1>(); - } - - /* Posting receives FOR CORNERS - */ - if (m_proc_grid.template proc<1, 1>() != -1) { - post_receive<1, 1>(); - } - - if (m_proc_grid.template proc<-1, -1>() != -1) { - post_receive<-1, -1>(); - } - - if (m_proc_grid.template proc<1, -1>() != -1) { - post_receive<1, -1>(); - } - - if (m_proc_grid.template proc<-1, 1>() != -1) { - post_receive<-1, 1>(); - } - - // UNCOMMENT THIS IF A DEADLOCK APPEARS BECAUSE SENDS HAS TO FOLLOW RECEIVES (TRUE IN SOME PLATFORMS) - // MPI_Barrier(GCL_WORLD); - - /* Sending data - */ - if (m_proc_grid.template proc<-1, 0>() != -1) { - perform_isend<-1, 0>(); - } - - if (m_proc_grid.template proc<1, 0>() != -1) { - perform_isend<1, 0>(); - } - - if (m_proc_grid.template proc<0, -1>() != -1) { - perform_isend<0, -1>(); - } - - if (m_proc_grid.template proc<0, 1>() != -1) { - perform_isend<0, 1>(); - } - - /* Sending data CORNERS - */ - if (m_proc_grid.template proc<-1, -1>() != -1) { - perform_isend<-1, -1>(); - } - - if (m_proc_grid.template proc<1, 1>() != -1) { - perform_isend<1, 1>(); - } - - if (m_proc_grid.template proc<1, -1>() != -1) { - perform_isend<1, -1>(); - } - - if (m_proc_grid.template proc<-1, 1>() != -1) { - perform_isend<-1, 1>(); - } - - /* Actual receives - */ - if (m_proc_grid.template proc<1, 0>() != -1) { - wait<1, 0>(); - } - - if (m_proc_grid.template proc<-1, 0>() != -1) { - wait<-1, 0>(); - } - - if (m_proc_grid.template proc<0, 1>() != -1) { - wait<0, 1>(); - } - - if (m_proc_grid.template proc<0, -1>() != -1) { - wait<0, -1>(); - } - - if (m_proc_grid.template proc<1, 1>() != -1) { - wait<1, 1>(); - } - - if (m_proc_grid.template proc<-1, -1>() != -1) { - wait<-1, -1>(); - } - - if (m_proc_grid.template proc<-1, 1>() != -1) { - wait<-1, 1>(); - } - - if (m_proc_grid.template proc<1, -1>() != -1) { - wait<1, -1>(); - } - } - }; - -} // namespace gridtools diff --git a/include/gridtools/communication/low_level/Halo_Exchange_2D_DT.hpp b/include/gridtools/communication/low_level/Halo_Exchange_2D_DT.hpp deleted file mode 100644 index d59576a52f..0000000000 --- a/include/gridtools/communication/low_level/Halo_Exchange_2D_DT.hpp +++ /dev/null @@ -1,562 +0,0 @@ -/* - * GridTools - * - * Copyright (c) 2014-2019, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include "../GCL.hpp" -#include "translate.hpp" -#include - -/** \file - * Pattern for regular cyclic and acyclic halo exchange pattern in 2D - * The communicating processes are arganized in a 2D grid. Given a process, neighbors processes - * are located using relative coordinates. In the next diagram, the given process is (0,0) - * while the neighbors are indicated with their relative coordinates. - * \code - * ------------------------- - * | | | | - * | -1,-1 | -1,0 | -1,1 | - * | | | | - * ------------------------- - * | | | | - * | 0,-1 | 0,0 | 0,1 | - * | | | | - * ------------------------- - * | | | | - * | 1,-1 | 1,0 | 1,1 | - * | | | | - * ------------------------- - * \endcode - */ - -/** \namespace gridtools - * All library classes, functions, and objects will reside in this namespace. - */ -namespace gridtools { - - /** \class Halo_Exchange_2D - * Class to instantiate, define and run a regular cyclic and acyclic - * halo exchange pattern in 2D. By regular it is intended that the - * amount of data sent and received during the execution of the - * pattern is known by all participants to the comunciation without - * communication. More specifically, the ampunt of data received is - * decided before the execution of the pattern. If a different - * ampunt of data is received from some process the behavior is - * undefined.\n - * Given a process (i,j), we can define \f$s_{ij}^{mn}\f$ and - * \f$r_{ij}^{mn}\f$ as the data sent and received from process - * (i,j) to/from process (i+m, j+n), respectively. For this pattern - * m and n are supposed to be in the range -1, 0, +1. \n\n When - * executing the Halo_Exchange_2D pattern, the requirement is that - * \f[r_{ij}^{mn} = s_{i+m,j+n}^{-m,-n}\f]. - * \n - * \tparam PROG_GRID Processor Grid type. An object of this type will be passed to constructor. - * \tparam ALIGN integer parameter that specify the alignment of the data to used. UNUSED IN CURRENT VERSION - * \n\n\n - * Pattern for regular cyclic and acyclic halo exchange pattern in 2D - * The communicating processes are arganized in a 2D grid. Given a process, neighbors processes - * are located using relative coordinates. In the next diagram, the given process is (0,0) - * while the neighbors are indicated with their relative coordinates. - * \code - * ------------------------- - * | | | | - * | -1,-1 | -1,0 | -1,1 | - * | | | | - * ------------------------- - * | | | | - * | 0,-1 | 0,0 | 0,1 | - * | | | | - * ------------------------- - * | | | | - * | 1,-1 | 1,0 | 1,1 | - * | | | | - * ------------------------- - * \endcode - * The pattern is cyclic or not bepending on the process grid passed to it. The cyclicity may be on only one - dimension. - * An example of use of the pattern is given below - \code - int iminus; - int iplus; - int jminus; - int jplus; - int iminusjminus; - int iplusjminus; - int iminusjplus; - int iplusjplus; - - int iminus_r; - int iplus_r; - int jminus_r; - int jplus_r; - int iminusjminus_r; - int iplusjminus_r; - int iminusjplus_r; - int iplusjplus_r; - - typedef gridtools::_2D_proc_grid_t grid_type; - - grid_type pg(P,my_id); - - gridtools::Halo_Exchange_2D he(pg); - - he.register_send_to_buffer<-1,-1>(&iminusjminus, sizeof(int)); - he.register_send_to_buffer<-1, 1>(&iminusjplus, sizeof(int)); - he.register_send_to_buffer< 1,-1>(&iplusjminus, sizeof(int)); - he.register_send_to_buffer< 1, 1>(&iplusjplus, sizeof(int)); - he.register_send_to_buffer<-1, 0>(&iminus, sizeof(int)); - he.register_send_to_buffer< 1, 0>(&iplus, sizeof(int)); - he.register_send_to_buffer< 0,-1>(&jminus, sizeof(int)); - he.register_send_to_buffer< 0, 1>(&jplus, sizeof(int)); - - he.register_receive_from_buffer<-1,-1>(&iminusjminus_r, sizeof(int)); - he.register_receive_from_buffer<-1, 1>(&iminusjplus_r, sizeof(int)); - he.register_receive_from_buffer< 1,-1>(&iplusjminus_r, sizeof(int)); - he.register_receive_from_buffer< 1, 1>(&iplusjplus_r, sizeof(int)); - he.register_receive_from_buffer<-1, 0>(&iminus_r, sizeof(int)); - he.register_receive_from_buffer< 1, 0>(&iplus_r, sizeof(int)); - he.register_receive_from_buffer< 0,-1>(&jminus_r, sizeof(int)); - he.register_receive_from_buffer< 0, 1>(&jplus_r, sizeof(int)); - - he.exchange(); - \endcode - */ - template - class Halo_Exchange_2D_DT { - - typedef translate_t<2, typename default_layout_map<2>::type> translate; - - class sr_buffers { - char *m_buffers[9]; // there is ona buffer more to allow for a simple indexing - MPI_Datatype m_datatype[9]; // there is ona buffer more to allow for a simple indexing - int m_size[9]; // Sizes in bytes - - public: - explicit sr_buffers() { - m_buffers[0] = nullptr; - m_buffers[1] = nullptr; - m_buffers[2] = nullptr; - m_buffers[3] = nullptr; - m_buffers[4] = nullptr; - m_buffers[5] = nullptr; - m_buffers[6] = nullptr; - m_buffers[7] = nullptr; - m_buffers[8] = nullptr; - m_size[0] = 0; - m_size[1] = 0; - m_size[2] = 0; - m_size[3] = 0; - m_size[4] = 0; - m_size[5] = 0; - m_size[6] = 0; - m_size[7] = 0; - m_size[8] = 0; - } - - char *&buffer(int I, int J) { return m_buffers[translate()(I, J)]; } - MPI_Datatype &datatype(int I, int J) { return m_datatype[translate()(I, J)]; } - int &size(int I, int J) { return m_size[translate()(I, J)]; } - }; - - template - struct TAG { - static const int value = (I + 1) * 3 + J + 1; - }; - - struct request_t { - MPI_Request request[9]; - MPI_Request &operator()(int i, int j) { return request[translate()(i, j)]; } - }; - - const PROC_GRID m_proc_grid; - - sr_buffers m_send_buffers; - sr_buffers m_recv_buffers; - request_t request; - request_t send_request; - - template - void post_receive() { -#ifndef NDEBUG - std::cout << "@" << gridtools::PID << "@ IRECV from (" << I << "," << J << ") " - << " P " << m_proc_grid.template proc() << " - " - << " T " << TAG<-I, -J>::value << " - " - << " R " << translate()(-I, -J) << "\n"; -#endif - - MPI_Irecv(static_cast(m_recv_buffers.buffer(I, J)), - m_recv_buffers.size(I, J), - m_recv_buffers.datatype(I, J), - m_proc_grid.template proc(), - TAG<-I, -J>::value, - gridtools::GCL_WORLD, // FIXME: m_proc_grid.communicator(); - &request(-I, -J)); - } - - template - void perform_isend() { -#ifndef NDEBUG - std::cout << "@" << gridtools::PID << "@ ISEND to (" << I << "," << J << ") " - << " P " << m_proc_grid.template proc() << " - " - << " T " << TAG::value << " - " - << " R " << translate()(I, J) << "\n"; -#endif - MPI_Isend(static_cast(m_send_buffers.buffer(I, J)), - m_send_buffers.size(I, J), - m_send_buffers.datatype(I, J), - m_proc_grid.template proc(), - TAG::value, - gridtools::GCL_WORLD, - &send_request(I, J)); - } - - template - void wait() { -#ifndef NDEBUG - std::cout << "@" << gridtools::PID << "@ WAIT (" << I << "," << J << ") " - << " R " << translate()(-I, -J) << "\n"; -#endif - - MPI_Status status; - MPI_Wait(&request(-I, -J), &status); - } - - public: - /** Type of the processor grid used by the pattern - */ - typedef PROC_GRID grid_type; - - /** Type of the translation map to map processors to buffers. - */ - typedef translate translate_type; - - /** Constructor that takes the process grid. Must be executed by all the processes in the grid. - * It is not possible to change the process grid once the pattern has beeninstantiated. - * - */ - explicit Halo_Exchange_2D_DT(PROC_GRID _pg) - : m_proc_grid(_pg), m_send_buffers(), m_recv_buffers(), request(), send_request() {} - - /** Returns the processor grid (as const reference) been used in construction - - If used to get process grid information additional information can be - found in \link GRIDS_INTERACTION \endlink - */ - PROC_GRID const &proc_grid() const { return m_proc_grid; } - - /** Function to register send buffers with the communication patter. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. It is possible to override the - previous pointer by re-registering a new pointer with a given destination. - \param[in] p Pointer to the first element of type T to send - \param[in] DT MPI Datatype - \param[in] s Number of bytes (not number of elements) to be send. In any case this is the amount of data - sent. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - */ - void register_send_to_buffer(void *p, MPI_Datatype const &DT, int s, int I, int J) { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - -#ifndef NDEBUG - std::cout << "@" << gridtools::PID << "@ " << __PRETTY_FUNCTION__ << " : " << p << " size " << s - << " I:" << I << " J:" << J << " (" << translate()(I, J) << ")\n"; -#endif - - m_send_buffers.buffer(I, J) = reinterpret_cast(p); - m_send_buffers.datatype(I, J) = DT; - m_send_buffers.size(I, J) = s; - } - - /** Function to register send buffers with the communication patter. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. It is possible to override the - previous pointer by re-registering a new pointer with a given destination. - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \param[in] p Pointer to the first element of type T to send - \param[in] DT MPI Datatype - \param[in] s Number of bytes (not number of elements) to be send. In any case this is the amount of data - sent. - */ - template - void register_send_to_buffer(void *p, MPI_Datatype const &DT, int s) { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - - register_send_to_buffer(p, DT, s, I, J); - } - - /** Function to register buffers for received data with the communication patter. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - received from that process. The amount of data is specified as number of bytes. It is possible to override - the previous pointer by re-registering a new pointer with a given source. - \param[in] p Pointer to the first element of type T where to put received data - \param[in] DT MPI Datatype - \param[in] s Number of bytes (not number of elements) expected to be received. This is the data that is - assumed to arrive. If less data arrives, the behaviour is undefined. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - */ - void register_receive_from_buffer(void *p, MPI_Datatype const &DT, int s, int I, int J) { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - -#ifndef NDEBUG - std::cout << "@" << gridtools::PID << "@ " << __PRETTY_FUNCTION__ << " : " << p << " size " << s - << " I:" << I << " J:" << J << " (" << translate()(I, J) << ")\n"; -#endif - - m_recv_buffers.buffer(I, J) = reinterpret_cast(p); - m_recv_buffers.datatype(I, J) = DT; - m_recv_buffers.size(I, J) = s; - } - - /** Function to register buffers for received data with the communication patter. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - received from that process. The amount of data is specified as number of bytes. It is possible to override - the previous pointer by re-registering a new pointer with a given source. - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \param[in] p Pointer to the first element of type T where to put received data - \param[in] DT MPI Datatype - \param[in] s Number of bytes (not number of elements) expected to be received. This is the data that is - assumed to arrive. If less data arrives, the behaviour is undefined. - */ - template - void register_receive_from_buffer(void *p, MPI_Datatype const &DT, int s) { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - - register_receive_from_buffer(p, DT, s, I, J); - } - - /* Setting sizes */ - - /** Function to set send buffers sizes if the size must be updated from a previous registration. The same - pointer passed during registration will be used to send data. It is possible to override the previous pointer - by re-registering a new pointer with a given destination. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. - \param[in] s Number of bytes (not number of elements) to be sent. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - */ - void set_send_to_size(int s, int I, int J) { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - - m_send_buffers.size(I, J) = s; - } - - /** Function to set send buffers sizes if the size must be updated from a previous registration. The same - pointer passed during registration will be used to send data. It is possible to override the previous pointer - by re-registering a new pointer with a given destination. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \param[in] s Number of bytes (not number of elements) to be sent. - */ - template - void set_send_to_size(int s) const { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - - set_send_to_size(s, I, J); - } - - /** Function to set receive buffers sizes if the size must be updated from a previous registration. The same - pointer passed during registration will be used to receive data. It is possible to override the previous - pointer by re-registering a new pointer with a given source. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. - \param[in] s Number of bytes (not number of elements) to be packed. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - */ - void set_receive_from_size(int s, int I, int J) const { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - - m_send_buffers.size(I, J) = s; - } - - /** Function to set receive buffers sizes if the size must be updated from a previous registration. The same - pointer passed during registration will be used to receive data. It is possible to override the previous - pointer by re-registering a new pointer with a given source. - Values I and J are coordinates relative to calling process and the buffer is the container for the data to be - sent to that process. The amount of data is specified as number of bytes. - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \param[in] s Number of bytes (not number of elements) to be packed. - */ - template - void set_receive_from_size(int s) const { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - - set_receive_from_size(s, I, J); - } - - void post_receives() { - /* Posting receives - */ - if (m_proc_grid.template proc<1, 0>() != -1) { - post_receive<1, 0>(); - } - - if (m_proc_grid.template proc<-1, 0>() != -1) { - post_receive<-1, 0>(); - } - - if (m_proc_grid.template proc<0, 1>() != -1) { - post_receive<0, 1>(); - } - - if (m_proc_grid.template proc<0, -1>() != -1) { - post_receive<0, -1>(); - } - - /* Posting receives FOR CORNERS - */ - if (m_proc_grid.template proc<1, 1>() != -1) { - post_receive<1, 1>(); - } - - if (m_proc_grid.template proc<-1, -1>() != -1) { - post_receive<-1, -1>(); - } - - if (m_proc_grid.template proc<1, -1>() != -1) { - post_receive<1, -1>(); - } - - if (m_proc_grid.template proc<-1, 1>() != -1) { - post_receive<-1, 1>(); - } - } - - void do_sends() { - /* Sending data - */ - if (m_proc_grid.template proc<-1, 0>() != -1) { - perform_isend<-1, 0>(); - } - - if (m_proc_grid.template proc<1, 0>() != -1) { - perform_isend<1, 0>(); - } - - if (m_proc_grid.template proc<0, -1>() != -1) { - perform_isend<0, -1>(); - } - - if (m_proc_grid.template proc<0, 1>() != -1) { - perform_isend<0, 1>(); - } - - /* Sending data CORNERS - */ - if (m_proc_grid.template proc<-1, -1>() != -1) { - perform_isend<-1, -1>(); - } - - if (m_proc_grid.template proc<1, 1>() != -1) { - perform_isend<1, 1>(); - } - - if (m_proc_grid.template proc<1, -1>() != -1) { - perform_isend<1, -1>(); - } - - if (m_proc_grid.template proc<-1, 1>() != -1) { - perform_isend<-1, 1>(); - } - } - - /** When called this function executes the communication pattern, that is, send all the send-buffers to the - * correspondinf receive-buffers. When the function returns the data in receive buffers can be safely accessed. - */ - void exchange() { - start_exchange(); - wait(); - } - - void start_exchange() { - // cout << GSL_pid() << " proc coords: " << r << " " << c << endl; - /* NORTH/IMINUS - |---------| |---------| |---------| |---------| |---------| |---------| |---------| |---------| - | | | | | | | | | | | | | | | | |------- | | -------| - | | |---------| | | | | | | | r0 | | r>0 | | | | r>0 | - WEST | | | r>0 | | | | | | c>0 | | | | | | | | c0 | EAST - JMINUS|---------| | | | | | | | | | | | | | | | | | | | |JPLUS - | | | | | | | | | | |------- | | -------| | | | | | | - |---------| |---------| |---------| |---------| |---------| |---------| |---------| |---------| - SOUTH/IPLUS - */ - - post_receives(); - - // UNCOMMENT THIS IF A DEADLOCK APPEARS BECAUSE SENDS HAS TO FOLLOW RECEIVES (TRUE IN SOME PLATFORMS) - // MPI_Barrier(GCL_WORLD); - - do_sends(); - } - - void wait() { - /* Actual receives - */ - if (m_proc_grid.template proc<1, 0>() != -1) { - wait<1, 0>(); - } - - if (m_proc_grid.template proc<-1, 0>() != -1) { - wait<-1, 0>(); - } - - if (m_proc_grid.template proc<0, 1>() != -1) { - wait<0, 1>(); - } - - if (m_proc_grid.template proc<0, -1>() != -1) { - wait<0, -1>(); - } - - if (m_proc_grid.template proc<1, 1>() != -1) { - wait<1, 1>(); - } - - if (m_proc_grid.template proc<-1, -1>() != -1) { - wait<-1, -1>(); - } - - if (m_proc_grid.template proc<-1, 1>() != -1) { - wait<-1, 1>(); - } - - if (m_proc_grid.template proc<1, -1>() != -1) { - wait<1, -1>(); - } - - MPI_Barrier(gridtools::GCL_WORLD); - } - }; - -} // namespace gridtools diff --git a/include/gridtools/communication/low_level/Halo_Exchange_3D.hpp b/include/gridtools/communication/low_level/Halo_Exchange_3D.hpp index 74071fc431..44ae398148 100644 --- a/include/gridtools/communication/low_level/Halo_Exchange_3D.hpp +++ b/include/gridtools/communication/low_level/Halo_Exchange_3D.hpp @@ -16,7 +16,6 @@ #include "../../common/gt_assert.hpp" #include "../GCL.hpp" #include "has_communicator.hpp" -#include "helper.hpp" #include "translate.hpp" /** \file @@ -245,10 +244,7 @@ namespace gridtools { sr_buffers m_send_buffers; sr_buffers m_recv_buffers; -#if defined(GCL_HOSTWORKAROUND) - sr_buffers m_host_send_buffers; - sr_buffers m_host_recv_buffers; -#endif + request_t request; request_t_mark send_request; @@ -269,17 +265,6 @@ namespace gridtools { double begin_time = MPI_Wtime(); #endif -#ifdef GCL_HOSTWORKAROUND - // using host workaround on gpu - // post receive to the page-locked buffer on the host - MPI_Irecv(static_cast(m_host_recv_buffers.buffer(I, J, K)), - m_recv_buffers.size(I, J, K), - MPI_CHAR, - m_proc_grid.template proc(), - TAG<-I, -J, -K>::value, - get_communicator(m_proc_grid), - &request(-I, -J, -K)); -#else MPI_Irecv(static_cast(m_recv_buffers.buffer(I, J, K)), m_recv_buffers.size(I, J, K), MPI_CHAR, @@ -287,7 +272,6 @@ namespace gridtools { TAG<-I, -J, -K>::value, get_communicator(m_proc_grid), &request(-I, -J, -K)); -#endif #ifdef GCL_TRACE double end_time = MPI_Wtime(); stats_collector_3D.add_event(CommEvent(ce_receive, @@ -316,23 +300,6 @@ namespace gridtools { double begin_time = MPI_Wtime(); #endif -#ifdef GCL_HOSTWORKAROUND - // using host workaround on gpu - // copy data from device to host - GT_CUDA_CHECK(cudaMemcpy(static_cast(m_host_send_buffers.buffer(I, J, K)), - static_cast(m_send_buffers.buffer(I, J, K)), - m_host_send_buffers.size(I, J, K), - cudaMemcpyDeviceToHost)); - - // perform send from host buffer - MPI_Isend(static_cast(m_host_send_buffers.buffer(I, J, K)), - m_send_buffers.size(I, J, K), - MPI_CHAR, - m_proc_grid.template proc(), - TAG::value, - get_communicator(m_proc_grid), - &send_request(I, J, K)); -#else MPI_Isend(static_cast(m_send_buffers.buffer(I, J, K)), m_send_buffers.size(I, J, K), MPI_CHAR, @@ -340,7 +307,7 @@ namespace gridtools { TAG::value, get_communicator(m_proc_grid), &send_request(I, J, K)); -#endif + send_request.set(I, J, K); #ifdef GCL_TRACE double end_time = MPI_Wtime(); @@ -395,14 +362,6 @@ namespace gridtools { MPI_Status status; MPI_Wait(&request(-I, -J, -K), &status); -#ifdef GCL_HOSTWORKAROUND - // copy from host buffers to device - // only need to do this if receiving from another PID - GT_CUDA_CHECK(cudaMemcpy(static_cast(m_recv_buffers.buffer(I, J, K)), - static_cast(m_host_recv_buffers.buffer(I, J, K)), - m_host_recv_buffers.size(I, J, K), - cudaMemcpyHostToDevice)); -#endif #ifdef GCL_TRACE double end_time = MPI_Wtime(); stats_collector_3D.add_event(CommEvent(ce_receive_wait, @@ -486,11 +445,6 @@ namespace gridtools { m_send_buffers.buffer(I, J, K) = reinterpret_cast(p); m_send_buffers.size(I, J, K) = s; -#ifdef GCL_HOSTWORKAROUND - // allocate a buffer on the host with page-locked memory - m_host_send_buffers.buffer(I, J, K) = _impl::helper_alloc::alloc(s); - m_host_send_buffers.size(I, J, K) = s; -#endif } /** Function to register send buffers with the communication patter. @@ -554,11 +508,6 @@ namespace gridtools { m_recv_buffers.buffer(I, J, K) = reinterpret_cast(p); m_recv_buffers.size(I, J, K) = s; -#ifdef GCL_HOSTWORKAROUND - // allocate a buffer on the host with page-locked memory - m_host_recv_buffers.buffer(I, J, K) = _impl::helper_alloc::alloc(s); - m_host_recv_buffers.size(I, J, K) = s; -#endif } /** Function to register buffers for received data with the communication patter. @@ -689,11 +638,6 @@ namespace gridtools { BOOST_MPL_ASSERT_RELATION(K, <=, 1); set_receive_from_size(s, I, J, K); -#ifdef GCL_HOSTWORKAROUND - // throw an assertion because the page-locked buffer allocated in the workaround - // has fixed size (if this is a problem we can free, then reallocate memory) - assert(false); -#endif } /** Retrieve the size of the buffer containing data to be sent to neighbor I, J, K. diff --git a/include/gridtools/communication/low_level/Halo_Exchange_3D_DT.hpp b/include/gridtools/communication/low_level/Halo_Exchange_3D_DT.hpp deleted file mode 100644 index 2e47bf62c5..0000000000 --- a/include/gridtools/communication/low_level/Halo_Exchange_3D_DT.hpp +++ /dev/null @@ -1,956 +0,0 @@ -/* - * GridTools - * - * Copyright (c) 2014-2019, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -#include "../../common/gt_assert.hpp" -#include "../GCL.hpp" -#include "has_communicator.hpp" -#include "translate.hpp" - -/** \file - * Pattern for regular cyclic and acyclic halo exchange pattern in 3D - * The communicating processes are arganized in a 3D grid. Given a process, neighbors processes - * are located using relative coordinates. In the next diagram, the given process is (0,0,0) - * while the neighbors are indicated with their relative coordinates. - * \code - * ---------------------------------- - * | | | | - * | -1,-1,-1 | -1,0,-1 | -1,1,-1 | - * | | | | - * ---------------------------------- - * | | | | - * | 0,-1,-1 | 0,0,-1 | 0,1,-1 | - * | | | | - * ---------------------------------- - * | | | | - * | 1,-1,-1 | 1,0,-1 | 1,1,-1 | - * | | | | - * ---------------------------------- - * - * ---------------------------------- - * | | | | - * | -1,-1, 0 | -1,0, 0 | -1,1, 0 | - * | | | | - * ---------------------------------- - * | | | | - * | 0,-1, 0 | 0,0, 0 | 0,1, 0 | - * | | | | - * ---------------------------------- - * | | | | - * | 1,-1, 0 | 1,0, 0 | 1,1, 0 | - * | | | | - * ---------------------------------- - * - * ---------------------------------- - * | | | | - * | -1,-1, 1 | -1,0, 1 | -1,1, 1 | - * | | | | - * ---------------------------------- - * | | | | - * | 0,-1, 1 | 0,0, 1 | 0,1, 1 | - * | | | | - * ---------------------------------- - * | | | | - * | 1,-1, 1 | 1,0, 1 | 1,1, 1 | - * | | | | - * ---------------------------------- - * \endcode - */ - -namespace gridtools { - - /** \class Halo_Exchange_3D - * Class to instantiate, define and run a regular cyclic and acyclic - * halo exchange pattern in 3D. By regular it is intended that the - * amount of data sent and received during the execution of the - * pattern is known by all participants to the comunciation without - * communication. More specifically, the ampunt of data received is - * decided before the execution of the pattern. If a different - * ampunt of data is received from some process the behavior is - * undefined.\n - * Given a process (i,j,k), we can define \f$s_{ijk}^{mnl}\f$ and - * \f$r_{ijk}^{mnl}\f$ as the data sent and received from process - * (i,j,k) to/from process (i+m, j+n, k+l), respectively. For this pattern - * m, n and l are supposed to be in the range -1, 0, +1. \n\n When - * executing the Halo_Exchange_3D pattern, the requirement is that - * \f[r_{ijk}^{mnl} = s_{i+m,j+n,k+l}^{-m,-n,-l}\f]. - * \n - * \tparam PROG_GRID Processor Grid type. An object of this type will be passed to constructor. - * \tparam ALIGN integer parameter that specify the alignment of the data to used. UNUSED IN CURRENT VERSION - * \n\n\n - * Pattern for regular cyclic and acyclic halo exchange pattern in 3D - * The communicating processes are arganized in a 3D grid. Given a process, neighbors processes - * are located using relative coordinates. In the next diagram, the given process is (0,0,0) - * while the neighbors are indicated with their relative coordinates. - * \code - * ---------------------------------- - * | | | | - * | -1,-1,-1 | -1,0,-1 | -1,1,-1 | - * | | | | - * ---------------------------------- - * | | | | - * | 0,-1,-1 | 0,0,-1 | 0,1,-1 | - * | | | | - * ---------------------------------- - * | | | | - * | 1,-1,-1 | 1,0,-1 | 1,1,-1 | - * | | | | - * ---------------------------------- - * - * ---------------------------------- - * | | | | - * | -1,-1, 0 | -1,0, 0 | -1,1, 0 | - * | | | | - * ---------------------------------- - * | | | | - * | 0,-1, 0 | 0,0, 0 | 0,1, 0 | - * | | | | - * ---------------------------------- - * | | | | - * | 1,-1, 0 | 1,0, 0 | 1,1, 0 | - * | | | | - * ---------------------------------- - * - * ---------------------------------- - * | | | | - * | -1,-1, 1 | -1,0, 1 | -1,1, 1 | - * | | | | - * ---------------------------------- - * | | | | - * | 0,-1, 1 | 0,0, 1 | 0,1, 1 | - * | | | | - * ---------------------------------- - * | | | | - * | 1,-1, 1 | 1,0, 1 | 1,1, 1 | - * | | | | - * ---------------------------------- - * \endcode - * The pattern is cyclic or not bepending on the process grid passed - * to it. The cyclicity may be on only one dimension. - * An example of use of the pattern is given below - \code - OUT CODE HERE AS IN 2D CASE - \endcode - - A running example can be found in the included example. \ example Halo_Exchange_test_3D.cpp - */ - template - class Halo_Exchange_3D_DT { - - typedef translate_t<3, typename default_layout_map<3>::type> translate; - - class sr_buffers { - char *m_buffers[27]; // there is ona buffer more to allow for a simple indexing - MPI_Datatype m_datatype[27]; // there is ona buffer more to allow for a simple indexing - int m_size[27]; // Sizes in bytes - public: - explicit sr_buffers() { - m_buffers[0] = nullptr; - m_buffers[1] = nullptr; - m_buffers[2] = nullptr; - m_buffers[3] = nullptr; - m_buffers[4] = nullptr; - m_buffers[5] = nullptr; - m_buffers[6] = nullptr; - m_buffers[7] = nullptr; - m_buffers[8] = nullptr; - m_buffers[9] = nullptr; - m_buffers[10] = nullptr; - m_buffers[11] = nullptr; - m_buffers[12] = nullptr; - m_buffers[13] = nullptr; - m_buffers[14] = nullptr; - m_buffers[15] = nullptr; - m_buffers[16] = nullptr; - m_buffers[17] = nullptr; - m_buffers[18] = nullptr; - m_buffers[19] = nullptr; - m_buffers[20] = nullptr; - m_buffers[21] = nullptr; - m_buffers[22] = nullptr; - m_buffers[23] = nullptr; - m_buffers[24] = nullptr; - m_buffers[25] = nullptr; - m_buffers[26] = nullptr; - - m_size[0] = 0; - m_size[1] = 0; - m_size[2] = 0; - m_size[3] = 0; - m_size[4] = 0; - m_size[5] = 0; - m_size[6] = 0; - m_size[7] = 0; - m_size[8] = 0; - m_size[9] = 0; - m_size[10] = 0; - m_size[11] = 0; - m_size[12] = 0; - m_size[13] = 0; - m_size[14] = 0; - m_size[15] = 0; - m_size[16] = 0; - m_size[17] = 0; - m_size[18] = 0; - m_size[19] = 0; - m_size[20] = 0; - m_size[21] = 0; - m_size[22] = 0; - m_size[23] = 0; - m_size[24] = 0; - m_size[25] = 0; - m_size[26] = 0; - } - - char *&buffer(int I, int J, int K) { return m_buffers[translate()(I, J, K)]; } - MPI_Datatype &datatype(int I, int J, int K) { return m_datatype[translate()(I, J, K)]; } - int &size(int I, int J, int K) { return m_size[translate()(I, J, K)]; } - int size(int I, int J, int K) const { return m_size[translate()(I, J, K)]; } - }; - - template - struct TAG { - static const int value = (K + 1) * 9 + (I + 1) * 3 + J + 1; - }; - - struct request_t { - MPI_Request request[27]; - MPI_Request &operator()(int i, int j, int k) { return request[translate()(i, j, k)]; } - }; - - const PROC_GRID m_proc_grid; - - sr_buffers m_send_buffers; - sr_buffers m_recv_buffers; - request_t request; - request_t send_request; - - template - void post_receive() { - if (m_recv_buffers.size(I, J, K)) { -#ifndef NDEBUG - int ss2; - MPI_Pack_size(1, m_recv_buffers.datatype(I, J, K), gridtools::GCL_WORLD, &ss2); - std::cout << "@" << gridtools::PID << "@ IRECV (" << I << "," << J << "," << K << ") " - << " P " << m_proc_grid.template proc() << " - " - << " T " << TAG<-I, -J, -K>::value << " - " - << " R " << translate()(-I, -J, -K) << " - " - << " Amount " << ss2 << "\n"; -#endif - MPI_Irecv(static_cast(m_recv_buffers.buffer(I, J, K)), - m_recv_buffers.size(I, J, K), - m_recv_buffers.datatype(I, J, K), - m_proc_grid.template proc(), - TAG<-I, -J, -K>::value, - get_communicator(m_proc_grid), - &request(-I, -J, -K)); - } - } - - template - void perform_isend() { - - if (m_send_buffers.size(I, J, K)) { -#ifndef NDEBUG - int ss2; - MPI_Pack_size(1, m_send_buffers.datatype(I, J, K), gridtools::GCL_WORLD, &ss2); - std::cout << "@" << gridtools::PID << "@ ISEND (" << I << "," << J << "," << K << ") " - << " P " << m_proc_grid.template proc() << " - " - << " T " << TAG::value << " - " - << " R " << translate()(I, J, K) << " - " - << " Amount " << ss2 << "\n"; -#endif - MPI_Isend(static_cast(m_send_buffers.buffer(I, J, K)), - m_send_buffers.size(I, J, K), - m_send_buffers.datatype(I, J, K), - m_proc_grid.template proc(), - TAG::value, - get_communicator(m_proc_grid), - &send_request(I, J, K)); - } - } - - template - void wait() { - if (m_recv_buffers.size(I, J, K)) { -#ifndef NDEBUG - std::cout << "@" << gridtools::PID << "@ WAIT (" << I << "," << J << "," << K << ") " - << " R " << translate()(-I, -J, -K) << "\n"; -#endif - - MPI_Status status; - MPI_Wait(&request(-I, -J, -K), &status); - } - } - - public: - /** Type of the processor grid used by the pattern - */ - typedef PROC_GRID grid_type; - - /** Type of the translation map to map processors to buffers. - */ - typedef translate translate_type; - - /** Constructor that takes the process grid. Must be executed by all the processes in the grid. - * It is not possible to change the process grid once the pattern has beeninstantiated. - * - */ - explicit Halo_Exchange_3D_DT(PROC_GRID _pg) - : m_proc_grid(_pg), m_send_buffers(), m_recv_buffers(), request(), send_request() {} - - /** Function to retrieve the grid from the pattern, from which user can query - location information. - - If used to get process grid information additional information can be - found in \link GRIDS_INTERACTION \endlink - */ - PROC_GRID const &proc_grid() const { return m_proc_grid; } - - /** Function to register send buffers with the communication patter. - - Values I and J are coordinates relative to calling process and - the buffer is the container for the data to be sent to that - process. The amount of data is specified as number of bytes. It - is possible to override the previous pointer by re-registering a - new pointer with a given destination. - - \param[in] p Pointer to the first element of type T to send - \param[in] DT MPI Datatype - \param[in] s Number of bytes (not number of elements) to be send. In any case this is the amount of data - sent. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - \param[in] K Relative coordinates of the receiving process along the third dimension - */ - void register_send_to_buffer(void *p, MPI_Datatype const &DT, int s, int I, int J, int K) { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - assert((K >= -1 && K <= 1)); - - // #ifndef NDEBUG - // std::cout << "@" << gridtools::PID - // << "@ " << __PRETTY_FUNCTION__ - // << " : " << p << " size " << s - // << " I:" << I - // << " J:" << J - // << " K:" << K - // << " (" << translate()(I,J,K) << ")\n"; - // #endif - - m_send_buffers.buffer(I, J, K) = reinterpret_cast(p); - m_send_buffers.datatype(I, J, K) = DT; - m_send_buffers.size(I, J, K) = s; - } - - /** Function to register send buffers with the communication patter. - - Values I, J and K are coordinates relative to calling process - and the buffer is the container for the data to be sent to that - process. The amount of data is specified as number of bytes. It - is possible to override the previous pointer by re-registering - a new pointer with a given destination. - - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \tparam K Relative coordinates of the receiving process along the third dimension - \param[in] p Pointer to the first element of type T to send - \param[in] DT MPI Datatype - \param[in] s Number of bytes (not number of elements) to be send. In any case this is the amount of data - sent. - */ - template - void register_send_to_buffer(void *p, MPI_Datatype const &DT, int s) { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - BOOST_MPL_ASSERT_RELATION(K, >=, -1); - BOOST_MPL_ASSERT_RELATION(K, <=, 1); - - register_send_to_buffer(p, DT, s, I, J, K); - } - - /** Function to register buffers for received data with the communication patter. - - Values I, J and K are coordinates relative to calling process and - the buffer is the container for the data to be received from - that process. The amount of data is specified as number of - bytes. It is possible to override the previous pointer by - re-registering a new pointer with a given source. - - \param[in] p Pointer to the first element of type T where to put received data - \param[in] DT MPI Datatype - \param[in] s Number of bytes (not number of elements) expected - to be received. This is the data that is assumed to arrive. If - less data arrives, the behaviour is undefined. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - \param[in] K Relative coordinates of the receiving process along the third dimension - */ - void register_receive_from_buffer(void *p, MPI_Datatype const &DT, int s, int I, int J, int K) { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - assert((K >= -1 && K <= 1)); - - // #ifndef NDEBUG - // std::cout << "@" << gridtools::PID - // << "@ " << __PRETTY_FUNCTION__ - // << " : " << p << " size " << s - // << " I:" << I - // << " J:" << J - // << " K:" << K - // << " (" << translate()(I,J,K) << ")\n"; - // #endif - - m_recv_buffers.buffer(I, J, K) = reinterpret_cast(p); - m_recv_buffers.datatype(I, J, K) = DT; - m_recv_buffers.size(I, J, K) = s; - } - - /** Function to register buffers for received data with the communication patter. - - Values I, J and K are coordinates relative to calling process and - the buffer is the container for the data to be received from - that process. The amount of data is specified as number of - bytes. It is possible to override the previous pointer by - re-registering a new pointer with a given source. - - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \tparam K Relative coordinates of the receiving process along the third dimension - \param[in] p Pointer to the first element of type T where to put received data - \param[in] DT MPI Datatype - \param[in] s Number of bytes (not number of elements) expected - to be received. This is the data that is assumed to arrive. If - less data arrives, the behaviour is undefined. - */ - template - void register_receive_from_buffer(void *p, MPI_Datatype const &DT, int s) { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - BOOST_MPL_ASSERT_RELATION(K, >=, -1); - BOOST_MPL_ASSERT_RELATION(K, <=, 1); - - register_receive_from_buffer(p, DT, s, I, J, K); - } - - /* Setting sizes */ - - /** Function to set send buffers sizes if the size must be updated - from a previous registration. The same pointer passed during - registration will be used to send data. It is possible to - override the previous pointer by re-registering a new pointer - with a given destination. - - Values I, J and K are coordinates relative to calling process and - the buffer is the container for the data to be sent to that - process. The amount of data is specified as number of bytes. - - \param[in] s Number of bytes (not number of elements) to be sent. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - \param[in] K Relative coordinates of the receiving process along the third dimension - */ - void set_send_to_size(int s, int I, int J, int K) { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - assert((K >= -1 && K <= 1)); - - m_send_buffers.size(I, J, K) = s; - } - - /** Function to set send buffers sizes if the size must be updated - from a previous registration. The same pointer passed during - registration will be used to send data. It is possible to - override the previous pointer by re-registering a new pointer - with a given destination. - - Values I, J and K are coordinates relative to calling process and - the buffer is the container for the data to be sent to that - process. The amount of data is specified as number of bytes. - - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \tparam K Relative coordinates of the receiving process along the third dimension - \param[in] s Number of bytes (not number of elements) to be sent. - */ - template - void set_send_to_size(int s) const { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - BOOST_MPL_ASSERT_RELATION(K, >=, -1); - BOOST_MPL_ASSERT_RELATION(K, <=, 1); - - set_send_to_size(s, I, J, K); - } - - /** Function to set receive buffers sizes if the size must be - updated from a previous registration. The same pointer passed - during registration will be used to receive data. It is - possible to override the previous pointer by re-registering a - new pointer with a given source. - - Values I, J and K are coordinates relative to calling process and - the buffer is the container for the data to be sent to that - process. The amount of data is specified as number of bytes. - - \param[in] s Number of bytes (not number of elements) to be packed. - \param[in] I Relative coordinates of the receiving process along the first dimension - \param[in] J Relative coordinates of the receiving process along the second dimension - \param[in] K Relative coordinates of the receiving process along the third dimension - */ - void set_receive_from_size(int s, int I, int J, int K) const { - assert((I >= -1 && I <= 1)); - assert((J >= -1 && J <= 1)); - assert((K >= -1 && K <= 1)); - - m_send_buffers.size(I, J, K) = s; - } - - /** Function to set receive buffers sizes if the size must be - updated from a previous registration. The same pointer passed - during registration will be used to receive data. It is - possible to override the previous pointer by re-registering a - new pointer with a given source. - - Values I and J are coordinates relative to calling process and - the buffer is the container for the data to be sent to that - process. The amount of data is specified as number of bytes. - - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \tparam K Relative coordinates of the receiving process along the third dimension - \param[in] s Number of bytes (not number of elements) to be packed. - */ - template - void set_receive_from_size(int s) const { - BOOST_MPL_ASSERT_RELATION(I, >=, -1); - BOOST_MPL_ASSERT_RELATION(I, <=, 1); - BOOST_MPL_ASSERT_RELATION(J, >=, -1); - BOOST_MPL_ASSERT_RELATION(J, <=, 1); - BOOST_MPL_ASSERT_RELATION(K, >=, -1); - BOOST_MPL_ASSERT_RELATION(K, <=, 1); - - set_receive_from_size(s, I, J, K); - } - - /** Retrieve the size of the buffer containing data to be sent to neighbor I, J, K. - - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \tparam K Relative coordinates of the receiving process along the third dimension - */ - int send_size(int I, int J, int K) const { return m_send_buffers.size(I, J, K); } - - /** Retrieve the size of the buffer containing data to be received from neighbor I, J, K. - - \tparam I Relative coordinates of the receiving process along the first dimension - \tparam J Relative coordinates of the receiving process along the second dimension - \tparam K Relative coordinates of the receiving process along the third dimension - */ - int recv_size(int I, int J, int K) const { return m_recv_buffers.size(I, J, K); } - - /** When called this function executes the communication pattern, - that is, send all the send-buffers to the correspondinf - receive-buffers. When the function returns the data in receive - buffers can be safely accessed. - */ - void exchange() { - start_exchange(); - wait(); - } - - void post_receives() { - /* Posting receives face -1 - */ - if (m_proc_grid.template proc<1, 0, -1>() != -1) { - post_receive<1, 0, -1>(); - } - - if (m_proc_grid.template proc<-1, 0, -1>() != -1) { - post_receive<-1, 0, -1>(); - } - - if (m_proc_grid.template proc<0, 1, -1>() != -1) { - post_receive<0, 1, -1>(); - } - - if (m_proc_grid.template proc<0, -1, -1>() != -1) { - post_receive<0, -1, -1>(); - } - - /* Posting receives FOR CORNERS face -1 - */ - if (m_proc_grid.template proc<1, 1, -1>() != -1) { - post_receive<1, 1, -1>(); - } - - if (m_proc_grid.template proc<-1, -1, -1>() != -1) { - post_receive<-1, -1, -1>(); - } - - if (m_proc_grid.template proc<1, -1, -1>() != -1) { - post_receive<1, -1, -1>(); - } - - if (m_proc_grid.template proc<-1, 1, -1>() != -1) { - post_receive<-1, 1, -1>(); - } - - if (m_proc_grid.template proc<0, 0, -1>() != -1) { - post_receive<0, 0, -1>(); - } - - /* Posting receives face 0 - */ - if (m_proc_grid.template proc<1, 0, 0>() != -1) { - post_receive<1, 0, 0>(); - } - - if (m_proc_grid.template proc<-1, 0, 0>() != -1) { - post_receive<-1, 0, 0>(); - } - - if (m_proc_grid.template proc<0, 1, 0>() != -1) { - post_receive<0, 1, 0>(); - } - - if (m_proc_grid.template proc<0, -1, 0>() != -1) { - post_receive<0, -1, 0>(); - } - - /* Posting receives FOR CORNERS face 0 - */ - if (m_proc_grid.template proc<1, 1, 0>() != -1) { - post_receive<1, 1, 0>(); - } - - if (m_proc_grid.template proc<-1, -1, 0>() != -1) { - post_receive<-1, -1, 0>(); - } - - if (m_proc_grid.template proc<1, -1, 0>() != -1) { - post_receive<1, -1, 0>(); - } - - if (m_proc_grid.template proc<-1, 1, 0>() != -1) { - post_receive<-1, 1, 0>(); - } - - /* Posting receives face 1 - */ - if (m_proc_grid.template proc<1, 0, 1>() != -1) { - post_receive<1, 0, 1>(); - } - - if (m_proc_grid.template proc<-1, 0, 1>() != -1) { - post_receive<-1, 0, 1>(); - } - - if (m_proc_grid.template proc<0, 1, 1>() != -1) { - post_receive<0, 1, 1>(); - } - - if (m_proc_grid.template proc<0, -1, 1>() != -1) { - post_receive<0, -1, 1>(); - } - - /* Posting receives FOR CORNERS face 1 - */ - if (m_proc_grid.template proc<1, 1, 1>() != -1) { - post_receive<1, 1, 1>(); - } - - if (m_proc_grid.template proc<-1, -1, 1>() != -1) { - post_receive<-1, -1, 1>(); - } - - if (m_proc_grid.template proc<1, -1, 1>() != -1) { - post_receive<1, -1, 1>(); - } - - if (m_proc_grid.template proc<-1, 1, 1>() != -1) { - post_receive<-1, 1, 1>(); - } - - if (m_proc_grid.template proc<0, 0, 1>() != -1) { - post_receive<0, 0, 1>(); - } - } - - void do_sends() { - /* Sending data face -1 - */ - if (m_proc_grid.template proc<-1, 0, -1>() != -1) { - perform_isend<-1, 0, -1>(); - } - - if (m_proc_grid.template proc<1, 0, -1>() != -1) { - perform_isend<1, 0, -1>(); - } - - if (m_proc_grid.template proc<0, -1, -1>() != -1) { - perform_isend<0, -1, -1>(); - } - - if (m_proc_grid.template proc<0, 1, -1>() != -1) { - perform_isend<0, 1, -1>(); - } - - /* Sending data CORNERS - */ - if (m_proc_grid.template proc<-1, -1, -1>() != -1) { - perform_isend<-1, -1, -1>(); - } - - if (m_proc_grid.template proc<1, 1, -1>() != -1) { - perform_isend<1, 1, -1>(); - } - - if (m_proc_grid.template proc<1, -1, -1>() != -1) { - perform_isend<1, -1, -1>(); - } - - if (m_proc_grid.template proc<-1, 1, -1>() != -1) { - perform_isend<-1, 1, -1>(); - } - - if (m_proc_grid.template proc<0, 0, -1>() != -1) { - perform_isend<0, 0, -1>(); - } - - /* Sending data face 0 - */ - if (m_proc_grid.template proc<-1, 0, 0>() != -1) { - perform_isend<-1, 0, 0>(); - } - - if (m_proc_grid.template proc<1, 0, 0>() != -1) { - perform_isend<1, 0, 0>(); - } - - if (m_proc_grid.template proc<0, -1, 0>() != -1) { - perform_isend<0, -1, 0>(); - } - - if (m_proc_grid.template proc<0, 1, 0>() != -1) { - perform_isend<0, 1, 0>(); - } - - /* Sending data CORNERS - */ - if (m_proc_grid.template proc<-1, -1, 0>() != -1) { - perform_isend<-1, -1, 0>(); - } - - if (m_proc_grid.template proc<1, 1, 0>() != -1) { - perform_isend<1, 1, 0>(); - } - - if (m_proc_grid.template proc<1, -1, 0>() != -1) { - perform_isend<1, -1, 0>(); - } - - if (m_proc_grid.template proc<-1, 1, 0>() != -1) { - perform_isend<-1, 1, 0>(); - } - - /* Sending data face 1 - */ - if (m_proc_grid.template proc<-1, 0, 1>() != -1) { - perform_isend<-1, 0, 1>(); - } - - if (m_proc_grid.template proc<1, 0, 1>() != -1) { - perform_isend<1, 0, 1>(); - } - - if (m_proc_grid.template proc<0, -1, 1>() != -1) { - perform_isend<0, -1, 1>(); - } - - if (m_proc_grid.template proc<0, 1, 1>() != -1) { - perform_isend<0, 1, 1>(); - } - - /* Sending data CORNERS - */ - if (m_proc_grid.template proc<-1, -1, 1>() != -1) { - perform_isend<-1, -1, 1>(); - } - - if (m_proc_grid.template proc<1, 1, 1>() != -1) { - perform_isend<1, 1, 1>(); - } - - if (m_proc_grid.template proc<1, -1, 1>() != -1) { - perform_isend<1, -1, 1>(); - } - - if (m_proc_grid.template proc<-1, 1, 1>() != -1) { - perform_isend<-1, 1, 1>(); - } - - if (m_proc_grid.template proc<0, 0, 1>() != -1) { - perform_isend<0, 0, 1>(); - } - } - - /** When called this function initiate the data exchabge. When the - function returns the data has to be considered already to be - transfered. Buffers should not be considered safe to access - until the wait() function returns. - */ - void start_exchange() { - - // cout << GSL_pid() << " proc coords: " << r << " " << c << endl; - /* NORTH/IMINUS - |---------| |---------| |---------| |---------| |---------| |---------| |---------| |---------| - | | | | | | | | | | | | | | | | |------- | | -------| - | | |---------| | | | | | | | r0 | | r>0 | | | | r>0 | - WEST | | | r>0 | | | | | | c>0 | | | | | | | | c0 | - EAST - JMINUS|---------| | | | | | | | | | | | | | | | | | | | |JPLUS - | | | | | | | | | | |------- | | -------| | | | | | | - |---------| |---------| |---------| |---------| |---------| |---------| |---------| |---------| - SOUTH/IPLUS - */ - - post_receives(); - - // UNCOMMENT THIS IF A DEADLOCK APPEARS BECAUSE SENDS HAS TO FOLLOW RECEIVES (TRUE IN SOME PLATFORMS) - // MPI_Barrier(GSL_WORLD); - - do_sends(); - } - - void wait() { - - /* Actual receives face -1 - */ - if (m_proc_grid.template proc<1, 0, -1>() != -1) { - wait<1, 0, -1>(); - } - - if (m_proc_grid.template proc<-1, 0, -1>() != -1) { - wait<-1, 0, -1>(); - } - - if (m_proc_grid.template proc<0, 1, -1>() != -1) { - wait<0, 1, -1>(); - } - - if (m_proc_grid.template proc<0, -1, -1>() != -1) { - wait<0, -1, -1>(); - } - - if (m_proc_grid.template proc<1, 1, -1>() != -1) { - wait<1, 1, -1>(); - } - - if (m_proc_grid.template proc<-1, -1, -1>() != -1) { - wait<-1, -1, -1>(); - } - - if (m_proc_grid.template proc<-1, 1, -1>() != -1) { - wait<-1, 1, -1>(); - } - - if (m_proc_grid.template proc<1, -1, -1>() != -1) { - wait<1, -1, -1>(); - } - - if (m_proc_grid.template proc<0, 0, -1>() != -1) { - wait<0, 0, -1>(); - } - - /* Actual receives face 0 - */ - if (m_proc_grid.template proc<1, 0, 0>() != -1) { - wait<1, 0, 0>(); - } - - if (m_proc_grid.template proc<-1, 0, 0>() != -1) { - wait<-1, 0, 0>(); - } - - if (m_proc_grid.template proc<0, 1, 0>() != -1) { - wait<0, 1, 0>(); - } - - if (m_proc_grid.template proc<0, -1, 0>() != -1) { - wait<0, -1, 0>(); - } - - if (m_proc_grid.template proc<1, 1, 0>() != -1) { - wait<1, 1, 0>(); - } - - if (m_proc_grid.template proc<-1, -1, 0>() != -1) { - wait<-1, -1, 0>(); - } - - if (m_proc_grid.template proc<-1, 1, 0>() != -1) { - wait<-1, 1, 0>(); - } - - if (m_proc_grid.template proc<1, -1, 0>() != -1) { - wait<1, -1, 0>(); - } - - /* Actual receives face -1 - */ - if (m_proc_grid.template proc<1, 0, 1>() != -1) { - wait<1, 0, 1>(); - } - - if (m_proc_grid.template proc<-1, 0, 1>() != -1) { - wait<-1, 0, 1>(); - } - - if (m_proc_grid.template proc<0, 1, 1>() != -1) { - wait<0, 1, 1>(); - } - - if (m_proc_grid.template proc<0, -1, 1>() != -1) { - wait<0, -1, 1>(); - } - - if (m_proc_grid.template proc<1, 1, 1>() != -1) { - wait<1, 1, 1>(); - } - - if (m_proc_grid.template proc<-1, -1, 1>() != -1) { - wait<-1, -1, 1>(); - } - - if (m_proc_grid.template proc<-1, 1, 1>() != -1) { - wait<-1, 1, 1>(); - } - - if (m_proc_grid.template proc<1, -1, 1>() != -1) { - wait<1, -1, 1>(); - } - - if (m_proc_grid.template proc<0, 0, 1>() != -1) { - wait<0, 0, 1>(); - } - - // MPI_Barrier(gridtools::GCL_WORLD); - } - }; - -} // namespace gridtools diff --git a/include/gridtools/communication/low_level/helper.hpp b/include/gridtools/communication/low_level/helper.hpp deleted file mode 100644 index 1ab00c48b7..0000000000 --- a/include/gridtools/communication/low_level/helper.hpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * GridTools - * - * Copyright (c) 2014-2019, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once -#include "../GCL.hpp" - -#ifdef GCL_HOSTWORKAROUND - -#include "../../common/cuda_util.hpp" - -namespace gridtools { - namespace _impl { - enum alloc_type { host_normal, host_page_locked }; - template - struct helper_alloc {}; - - // manage page locked memory on the host - template - struct helper_alloc { - - static T *alloc(size_t sz) { - if (sz) { - T *ptr; - GT_CUDA_CHECK(cudaMallocHost(&ptr, sz * sizeof(T))); - return ptr; - } else { - return nullptr; - } - } - - static void free(T *t) { GT_CUDA_CHECK(cudaFreeHost(t)); } - - static T *realloc(T *t, size_t sz) { - free(t); - return alloc(sz); - } - }; - - // manage normal memory on the host - template - struct helper_alloc { - - static T *alloc(size_t sz) { - if (sz) { - T *ptr = malloc(sz); - return ptr; - } else { - return 0; - } - } - - static void free(T *t) { free(t); } - - static T *realloc(T *t, size_t sz) { - free(t); - return alloc(sz); - } - }; - } // namespace _impl -} // namespace gridtools -#endif diff --git a/include/gridtools/communication/low_level/proc_grids.hpp b/include/gridtools/communication/low_level/proc_grids.hpp deleted file mode 100644 index 415b3bd6a4..0000000000 --- a/include/gridtools/communication/low_level/proc_grids.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/* - * GridTools - * - * Copyright (c) 2014-2019, ETH Zurich - * All rights reserved. - * - * Please, refer to the LICENSE file in the root directory. - * SPDX-License-Identifier: BSD-3-Clause - */ -#pragma once - -// This file needs to be changed - -#include -#include diff --git a/include/gridtools/distributed_boundaries/comm_traits.hpp b/include/gridtools/distributed_boundaries/comm_traits.hpp index 7f8a240136..e825977f4c 100644 --- a/include/gridtools/distributed_boundaries/comm_traits.hpp +++ b/include/gridtools/distributed_boundaries/comm_traits.hpp @@ -41,10 +41,8 @@ namespace gridtools { }; using proc_layout = gridtools::layout_map<0, 1, 2>; - using proc_grid_type = gridtools::MPI_3D_process_grid_t<3>; using comm_arch_type = Arch; using compute_arch = typename compute_arch_of::type; - static constexpr int version = gridtools::version_manual; using data_layout = typename StorageType::storage_info_t::layout_t; using value_type = typename StorageType::data_t; }; diff --git a/include/gridtools/distributed_boundaries/distributed_boundaries.hpp b/include/gridtools/distributed_boundaries/distributed_boundaries.hpp index fda5f38bf3..353c7236a0 100644 --- a/include/gridtools/distributed_boundaries/distributed_boundaries.hpp +++ b/include/gridtools/distributed_boundaries/distributed_boundaries.hpp @@ -106,9 +106,7 @@ namespace gridtools { using pattern_type = halo_exchange_dynamic_ut; + typename CTraits::comm_arch_type>; private: using performance_meter_t = typename timer_traits::timer_type; @@ -216,7 +214,7 @@ namespace gridtools { boundary_only(jobs...); } - typename CTraits::proc_grid_type const &proc_grid() const { return m_he.comm(); } + typename pattern_type::grid_type const &proc_grid() const { return m_he.comm(); } std::string print_meters() const { return m_meter_pack.to_string() + "\n" + m_meter_exchange.to_string() + "\n" + m_meter_bc.to_string(); @@ -248,9 +246,9 @@ namespace gridtools { /*Apply boundary to data*/ call_apply(boundary>(m_halos, + proc_grid_predicate>(m_halos, bcapply.boundary_to_apply(), - proc_grid_predicate(m_he.comm())), + proc_grid_predicate(m_he.comm())), bcapply.stores(), meta::make_integer_sequence::value>{}); } diff --git a/include/gridtools/distributed_boundaries/mock_pattern.hpp b/include/gridtools/distributed_boundaries/mock_pattern.hpp index cc79f7fa87..9d9ab24ca3 100644 --- a/include/gridtools/distributed_boundaries/mock_pattern.hpp +++ b/include/gridtools/distributed_boundaries/mock_pattern.hpp @@ -66,11 +66,13 @@ namespace gridtools { MPI_3D_process_grid_t<3> proc_grid() const { return m_comm; } }; - template + template struct halo_exchange_dynamic_ut { boollist<3> m_period; MPI_3D_process_grid_t<3> m_comm; + using grid_type = MPI_3D_process_grid_t<3>; + template halo_exchange_dynamic_ut(boollist<3> p, A) : m_period{p}, m_comm{p, 0} { if ((m_period.value(0) != false) or (m_period.value(1) != false) or (m_period.value(2) != false)) { diff --git a/regression/communication/test_halo_exchange_3D_all.cpp b/regression/communication/test_halo_exchange_3D_all.cpp index 689bd4da85..2971693188 100644 --- a/regression/communication/test_halo_exchange_3D_all.cpp +++ b/regression/communication/test_halo_exchange_3D_all.cpp @@ -95,14 +95,11 @@ namespace halo_exchange_3D_all { logically to processor (p+1,q,r). The other dimensions goes as the others. */ - static const int version = gridtools::version_manual; typedef gridtools::halo_exchange_dynamic_ut, triple_t::data_type, - gridtools::MPI_3D_process_grid_t<3>, - arch_type, - version> + arch_type> pattern_type; /* The pattern is now instantiated with the periodicities and the diff --git a/regression/communication/test_halo_exchange_3D_all_2.cpp b/regression/communication/test_halo_exchange_3D_all_2.cpp index 5f8af6d619..88d88173ad 100644 --- a/regression/communication/test_halo_exchange_3D_all_2.cpp +++ b/regression/communication/test_halo_exchange_3D_all_2.cpp @@ -99,14 +99,11 @@ namespace halo_exchange_3D_all_2 { logically to processor (p+1,q,r). The other dimensions goes as the others. */ - static const int version = gridtools::version_manual; typedef gridtools::halo_exchange_dynamic_ut, triple_t::data_type, - gridtools::MPI_3D_process_grid_t<3>, - arch_type, - version> + arch_type> pattern_type; /* The pattern is now instantiated with the periodicities and the diff --git a/regression/communication/test_halo_exchange_3D_all_3.cpp b/regression/communication/test_halo_exchange_3D_all_3.cpp index 3886f75972..bf069677b4 100644 --- a/regression/communication/test_halo_exchange_3D_all_3.cpp +++ b/regression/communication/test_halo_exchange_3D_all_3.cpp @@ -98,16 +98,11 @@ namespace halo_exchange_3D_all_3 { logically to processor (p+1,q,r). The other dimensions goes as the others. */ - static const int version = - gridtools::version_manual; // 0 is the usual version, 1 is the one that build the whole - // datatype (Only vector interface supported) typedef gridtools::halo_exchange_dynamic_ut, triple_t::data_type, - gridtools::MPI_3D_process_grid_t<3>, - arch_type, - version> + arch_type> pattern_type; /* The pattern is now instantiated with the periodicities and the diff --git a/regression/communication/test_halo_exchange_3D_generic.cpp b/regression/communication/test_halo_exchange_3D_generic.cpp index 333a7c3c1e..8bbdab9eca 100644 --- a/regression/communication/test_halo_exchange_3D_generic.cpp +++ b/regression/communication/test_halo_exchange_3D_generic.cpp @@ -45,10 +45,6 @@ namespace halo_exchange_3D_generic { double lapse_time3; double lapse_time4; -#ifndef PACKING_TYPE -#define PACKING_TYPE gridtools::version_manual -#endif - #define B_ADD 1 #define C_ADD 2 @@ -103,8 +99,7 @@ namespace halo_exchange_3D_generic { logically to processor (p+1,q,r). The other dimensions goes as the others. */ - typedef gridtools::halo_exchange_generic, 3, arch_type, PACKING_TYPE> - pattern_type; + typedef gridtools::halo_exchange_generic, arch_type> pattern_type; /* The pattern is now instantiated with the periodicities and the communicator. The periodicity of the communicator is diff --git a/regression/communication/test_halo_exchange_3D_generic_full.cpp b/regression/communication/test_halo_exchange_3D_generic_full.cpp index d907a2da6e..8f1f5dbd29 100644 --- a/regression/communication/test_halo_exchange_3D_generic_full.cpp +++ b/regression/communication/test_halo_exchange_3D_generic_full.cpp @@ -114,9 +114,7 @@ namespace halo_exchange_3D_generic_full { logically to processor (p+1,q,r). The other dimensions goes as the others. */ - typedef gridtools:: - halo_exchange_generic, 3, arch_type, gridtools::version_manual> - pattern_type; + typedef gridtools::halo_exchange_generic, arch_type> pattern_type; /* The pattern is now instantiated with the periodicities and the communicator. The periodicity of the communicator is diff --git a/regression/copy_stencil_parallel.hpp b/regression/copy_stencil_parallel.hpp index d73c65ce61..aa82a4cdb8 100644 --- a/regression/copy_stencil_parallel.hpp +++ b/regression/copy_stencil_parallel.hpp @@ -80,13 +80,11 @@ namespace copy_stencil { typedef gridtools::halo_exchange_dynamic_ut, float_type, - MPI_3D_process_grid_t<3>, #ifdef __CUDACC__ - gridtools::gcl_gpu, + gridtools::gcl_gpu> #else - gridtools::gcl_cpu, + gridtools::gcl_cpu> #endif - gridtools::version_manual> pattern_type; pattern_type he(gridtools::boollist<3>(false, false, false), CartComm); diff --git a/regression/shallow_water_enhanced.hpp b/regression/shallow_water_enhanced.hpp index de76c422ef..b5e547624b 100644 --- a/regression/shallow_water_enhanced.hpp +++ b/regression/shallow_water_enhanced.hpp @@ -259,13 +259,11 @@ namespace shallow_water { typedef gridtools::halo_exchange_dynamic_ut, float_type, - MPI_3D_process_grid_t<3>, #ifdef __CUDACC__ - gridtools::gcl_gpu, + gridtools::gcl_gpu> #else - gridtools::gcl_cpu, + gridtools::gcl_cpu> #endif - gridtools::version_manual> pattern_type; pattern_type he(gridtools::boollist<3>(false, false, false), CartComm);