diff --git a/INSTALL.md b/INSTALL.md index f2891672e2..ed0ba5046c 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -43,7 +43,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b - [Range-V3](https://github.com/ericniebler/range-v3.git) -- a Ranges library that served as the basis for Ranges component of C++20 and later. - [BTAS](http://github.com/ValeevGroup/BTAS), tag 1cfcb12647c768ccd83b098c64cda723e1275e49 . If usable BTAS installation is not found, TiledArray will download and compile BTAS from source. *This is the recommended way to compile BTAS for all users*. -- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 95589b0d020a076f93d02eead6da654b23dd3d91 . +- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 93a9a5cec2a8fa87fba3afe8056607e6062a9058 . Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray. If usable MADNESS installation is not found, TiledArray will download and compile MADNESS from source. *This is the recommended way to compile MADNESS for all users*. diff --git a/external/umpire.cmake b/external/umpire.cmake index ee2fa490e1..5b7a4f4078 100644 --- a/external/umpire.cmake +++ b/external/umpire.cmake @@ -223,6 +223,8 @@ else() "$;$;$;$;$;$;$" INTERFACE_LINK_LIBRARIES "$;$" + INTERFACE_COMPILE_DEFINITIONS + FMT_HEADER_ONLY=1 ) install(TARGETS TiledArray_UMPIRE EXPORT tiledarray COMPONENT tiledarray) diff --git a/external/versions.cmake b/external/versions.cmake index fbd0399ee8..ef1505a5b8 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -11,8 +11,8 @@ set(TA_INSTALL_EIGEN_PREVIOUS_VERSION 3.3.7) set(TA_INSTALL_EIGEN_URL_HASH SHA256=b4c198460eba6f28d34894e3a5710998818515104d6e74e5cc331ce31e46e626) set(TA_INSTALL_EIGEN_PREVIOUS_URL_HASH MD5=b9e98a200d2455f06db9c661c5610496) -set(TA_TRACKED_MADNESS_TAG 95589b0d020a076f93d02eead6da654b23dd3d91) -set(TA_TRACKED_MADNESS_PREVIOUS_TAG 96ac90e8f193ccfaf16f346b4652927d2d362e75) +set(TA_TRACKED_MADNESS_TAG 93a9a5cec2a8fa87fba3afe8056607e6062a9058) +set(TA_TRACKED_MADNESS_PREVIOUS_TAG 95589b0d020a076f93d02eead6da654b23dd3d91) set(TA_TRACKED_MADNESS_VERSION 0.10.1) set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1) diff --git a/python/src/TiledArray/python/array.h b/python/src/TiledArray/python/array.h index 782846df4c..e3cc1c79b7 100644 --- a/python/src/TiledArray/python/array.h +++ b/python/src/TiledArray/python/array.h @@ -208,7 +208,7 @@ void make_array_class(py::object m, const char *name) { py::return_value_policy::reference) .def_property_readonly("trange", &array::trange) .def_property_readonly("shape", &array::shape) - .def("fill", &Array::fill, py::arg("value"), + .def("fill", &Array::template fill<>, py::arg("value"), py::arg("skip_set") = false) .def("init", &array::init_tiles) // Array object needs be alive while iterator is used */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 80f2a49710..a16c05d0b2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -225,7 +225,7 @@ if(HIP_FOUND OR CUDA_FOUND) TiledArray/external/cuda.h TiledArray/device/cpu_cuda_vector.h) endif(CUDA_FOUND) -endif(CUDA_FOUND OR HIP_FOUND) +endif(HIP_FOUND OR CUDA_FOUND) set(TILEDARRAY_SOURCE_FILES TiledArray/tiledarray.cpp diff --git a/src/TiledArray/array_impl.h b/src/TiledArray/array_impl.h index df7138a9e7..9dbf5640c4 100644 --- a/src/TiledArray/array_impl.h +++ b/src/TiledArray/array_impl.h @@ -198,6 +198,17 @@ std::ostream& operator<<(std::ostream& os, const TileConstReference& a) { return os; } +/// Callaback used to update counter (typically, task counter) +template +struct IncrementCounter : public madness::CallbackInterface { + AtomicInt& counter; + IncrementCounter(AtomicInt& counter) : counter(counter) {} + void notify() override { + ++counter; + delete this; + } +}; + } // namespace detail } // namespace TiledArray @@ -770,20 +781,24 @@ class ArrayImpl : public TensorImpl, /// \tparam Op The type of the functor/function /// \param[in] op The operation used to generate tiles /// \param[in] skip_set If false, will throw if any tiles are already set + /// \return the total number of tiles that have been (or will be) initialized /// \throw TiledArray::Exception if the PIMPL is not set. Strong throw /// guarantee. /// \throw TiledArray::Exception if a tile is already set and skip_set is /// false. Weak throw guarantee. - template - void init_tiles(Op&& op, bool skip_set = false) { + template + std::int64_t init_tiles(Op&& op, bool skip_set = false) { // lifetime management of op depends on whether it is a lvalue ref (i.e. has // an external owner) or an rvalue ref // - if op is an lvalue ref: pass op to tasks // - if op is an rvalue ref pass make_shared_function(op) to tasks auto op_shared_handle = make_op_shared_handle(std::forward(op)); + std::int64_t ntiles_initialized{0}; auto it = this->pmap()->begin(); const auto end = this->pmap()->end(); + std::atomic ntask_completed{0}; for (; it != end; ++it) { const auto& index = *it; if (!this->is_zero(index)) { @@ -792,19 +807,39 @@ class ArrayImpl : public TensorImpl, if (fut.probe()) continue; } if constexpr (Exec == HostExecutor::MADWorld) { - Future tile = this->world().taskq.add( - [this_sptr = this->shared_from_this(), - index = ordinal_type(index), op_shared_handle]() -> value_type { + Future tile = + this->world().taskq.add([this_sptr = this->shared_from_this(), + index = ordinal_type(index), + op_shared_handle, this]() -> value_type { return op_shared_handle( this_sptr->trange().make_tile_range(index)); }); + ++ntiles_initialized; + if constexpr (fence == Fence::Local) { + tile.register_callback( + new IncrementCounter( + ntask_completed)); + } set(index, std::move(tile)); } else { static_assert(Exec == HostExecutor::Thread); set(index, op_shared_handle(this->trange().make_tile_range(index))); + ++ntiles_initialized; } } } + + if constexpr (fence == Fence::Local) { + if constexpr (Exec == HostExecutor::MADWorld) { + if (ntiles_initialized > 0) + this->world().await([&ntask_completed, ntiles_initialized]() { + return ntask_completed == ntiles_initialized; + }); + } + } else if constexpr (fence == Fence::Global) { + this->world().gop.fence(); + } + return ntiles_initialized; } }; // class ArrayImpl diff --git a/src/TiledArray/conversions/foreach.h b/src/TiledArray/conversions/foreach.h index 20f2d36ec3..2c77c91a0f 100644 --- a/src/TiledArray/conversions/foreach.h +++ b/src/TiledArray/conversions/foreach.h @@ -283,11 +283,10 @@ inline std:: arg.trange().tiles_range(), 0); // Construct the task function used to construct the result tiles. - madness::AtomicInt counter; - counter = 0; - int task_count = 0; + std::atomic ntask_completed{0}; + std::int64_t ntask_created{0}; auto op_shared_handle = make_op_shared_handle(std::forward(op)); - const auto task = [op_shared_handle, &counter, &tile_norms]( + const auto task = [op_shared_handle, &tile_norms]( const ordinal_type ord, const_if_t& arg_tile, const ArgTiles&... arg_tiles) -> result_value_type { @@ -295,7 +294,6 @@ inline std:: auto result_tile = op_caller(std::move(op_shared_handle), tile_norms.at_ordinal(ord), arg_tile, arg_tiles...); - ++counter; return result_tile; }; @@ -310,7 +308,9 @@ inline std:: continue; auto result_tile = world.taskq.add(task, ord, arg.find_local(ord), args.find(ord)...); - ++task_count; + ++ntask_created; + result_tile.register_callback( + new IncrementCounter(ntask_completed)); tiles.emplace_back(ord, std::move(result_tile)); if (op_returns_void) // if Op does not evaluate norms, use the (scaled) // norms of the first arg @@ -324,7 +324,9 @@ inline std:: auto result_tile = world.taskq.add(task, ord, detail::get_sparse_tile(ord, arg), detail::get_sparse_tile(ord, args)...); - ++task_count; + ++ntask_created; + result_tile.register_callback( + new IncrementCounter(ntask_completed)); tiles.emplace_back(ord, std::move(result_tile)); if (op_returns_void) // if Op does not evaluate norms, find max // (scaled) norms of all args @@ -339,9 +341,10 @@ inline std:: } // Wait for tile norm data to be collected. - if (task_count > 0) - world.await( - [&counter, task_count]() -> bool { return counter == task_count; }); + if (ntask_created > 0) + world.await([&ntask_completed, ntask_created]() -> bool { + return ntask_created == ntask_completed; + }); // Construct the new array result_array_type result( diff --git a/src/TiledArray/conversions/make_array.h b/src/TiledArray/conversions/make_array.h index 6f5ada0bba..1295e6f8e4 100644 --- a/src/TiledArray/conversions/make_array.h +++ b/src/TiledArray/conversions/make_array.h @@ -26,6 +26,7 @@ #ifndef TILEDARRAY_CONVERSIONS_MAKE_ARRAY_H__INCLUDED #define TILEDARRAY_CONVERSIONS_MAKE_ARRAY_H__INCLUDED +#include "TiledArray/array_impl.h" #include "TiledArray/external/madness.h" #include "TiledArray/shape.h" #include "TiledArray/type_traits.h" @@ -79,6 +80,10 @@ inline Array make_array( // Make an empty result array Array result(world, trange); + // Construct the task function used to construct the result tiles. + std::atomic ntask_completed{0}; + std::int64_t ntask_created{0}; + // Iterate over local tiles of arg for (const auto index : *result.pmap()) { // Spawn a task to evaluate the tile @@ -89,11 +94,20 @@ inline Array make_array( return tile; }, trange.make_tile_range(index)); - + ++ntask_created; + tile.register_callback( + new detail::IncrementCounter( + ntask_completed)); // Store result tile - result.set(index, tile); + result.set(index, std::move(tile)); } + // Wait for tile tasks to complete + if (ntask_created > 0) + world.await([&ntask_completed, ntask_created]() -> bool { + return ntask_completed == ntask_created; + }); + return result; } @@ -150,26 +164,28 @@ inline Array make_array( trange.tiles_range(), 0); // Construct the task function used to construct the result tiles. - madness::AtomicInt counter; - counter = 0; - int task_count = 0; + std::atomic ntask_completed{0}; + std::int64_t ntask_created{0}; auto task = [&](const ordinal_type index) -> value_type { value_type tile; tile_norms.at_ordinal(index) = op(tile, trange.make_tile_range(index)); - ++counter; return tile; }; for (const auto index : *pmap) { auto result_tile = world.taskq.add(task, index); - ++task_count; + ++ntask_created; + result_tile.register_callback( + new detail::IncrementCounter( + ntask_completed)); tiles.emplace_back(index, std::move(result_tile)); } // Wait for tile norm data to be collected. - if (task_count > 0) - world.await( - [&counter, task_count]() -> bool { return counter == task_count; }); + if (ntask_created > 0) + world.await([&ntask_completed, ntask_created]() -> bool { + return ntask_completed == ntask_created; + }); // Construct the new array Array result(world, trange, diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index 3bc9fe3c62..1aa90ce351 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -906,23 +906,29 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// guarantee. /// \throw TiledArray::Exception if skip_set is false and a local tile is /// already set. Weak throw guarantee. - void fill_local(const element_type& value = element_type(), - bool skip_set = false) { - init_tiles( + template + std::int64_t fill_local(const element_type& value = element_type(), + bool skip_set = false) { + return init_tiles( [value](const range_type& range) { return value_type(range, value); }, skip_set); } /// Fill all local tiles with the specified value + /// \tparam fence If Fence::No, the operation will return early, + /// before the tasks have completed /// \param[in] value What each local tile should be filled with. /// \param[in] skip_set If false, will throw if any tiles are already set + /// \return the total number of tiles that have been (or will be) initialized /// \throw TiledArray::Exception if the PIMPL is uninitialized. Strong throw /// guarantee. /// \throw TiledArray::Exception if skip_set is false and a local tile is /// already set. Weak throw guarantee. - void fill(const element_type& value = numeric_type(), bool skip_set = false) { - fill_local(value, skip_set); + template + std::int64_t fill(const element_type& value = numeric_type(), + bool skip_set = false) { + return fill_local(value, skip_set); } /// Fill all local tiles with random values @@ -934,18 +940,21 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// generate random values of type T this function will be disabled via SFINAE /// and attempting to use it will lead to a compile-time error. /// + /// \tparam fence If Fence::No, the operation will return early, + /// before the tasks have completed /// \tparam T The type of random value to generate. Defaults to /// element_type. /// \param[in] skip_set If false, will throw if any tiles are already set + /// \return the total number of tiles that have been (or will be) initialized /// \throw TiledArray::Exception if the PIMPL is not initialized. Strong /// throw guarantee. /// \throw TiledArray::Exception if skip_set is false and a local tile is /// already initialized. Weak throw guarantee. template > - void fill_random(bool skip_set = false) { - init_elements( + std::int64_t fill_random(bool skip_set = false) { + return init_elements( [](const auto&) { return detail::MakeRandom::generate_value(); }); } @@ -978,6 +987,8 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// return tile; /// }); /// \endcode + /// \tparam fence If Fence::No, the operation will return early, + /// before the tasks have completed /// \tparam Op The type of the functor/function /// \param[in] op The operation used to generate tiles /// \param[in] skip_set If false, will throw if any tiles are already set @@ -985,9 +996,11 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// guarantee. /// \throw TiledArray::Exception if a tile is already set and skip_set is /// false. Weak throw guarantee. - template - void init_tiles(Op&& op, bool skip_set = false) { - impl_ref().template init_tiles(std::forward(op), skip_set); + template + std::int64_t init_tiles(Op&& op, bool skip_set = false) { + return impl_ref().template init_tiles(std::forward(op), + skip_set); } /// Initialize elements of local, non-zero tiles with a user provided functor @@ -1009,15 +1022,17 @@ class DistArray : public madness::archive::ParallelSerializableObject { /// \tparam Op Type of the function/functor which will generate the elements. /// \param[in] op The operation used to generate elements /// \param[in] skip_set If false, will throw if any tiles are already set + /// \return the total number of tiles that have been (or will be) initialized /// \throw TiledArray::Exception if the PIMPL is not initialized. Strong /// throw guarnatee. /// \throw TiledArray::Exception if skip_set is false and a local, non-zero /// tile is already initialized. Weak throw /// guarantee. - template - void init_elements(Op&& op, bool skip_set = false) { + template + std::int64_t init_elements(Op&& op, bool skip_set = false) { auto op_shared_handle = make_op_shared_handle(std::forward(op)); - init_tiles( + return init_tiles( [op = std::move(op_shared_handle)]( const TiledArray::Range& range) -> value_type { // Initialize the tile with the given range object diff --git a/src/TiledArray/fwd.h b/src/TiledArray/fwd.h index 652b835fab..e33aea5c18 100644 --- a/src/TiledArray/fwd.h +++ b/src/TiledArray/fwd.h @@ -203,6 +203,14 @@ using Array enum class HostExecutor { Thread, MADWorld, Default = MADWorld }; +/// fence types +enum class Fence { + Global, //!< global fence (`world.gop.fence()`) + Local, //!< local fence (all local work done, equivalent to + //!< `world.taskq.fence() in absence of active messages) + No //!< no fence +}; + namespace conversions { /// user defined conversions diff --git a/src/TiledArray/special/diagonal_array.h b/src/TiledArray/special/diagonal_array.h index d60b23db94..eac0c65e92 100644 --- a/src/TiledArray/special/diagonal_array.h +++ b/src/TiledArray/special/diagonal_array.h @@ -157,7 +157,8 @@ std::enable_if_t::value, void> write_diag_tiles_to_array_rng(Array &A, RandomAccessIterator diagonals_begin) { using Tile = typename Array::value_type; - A.init_tiles( + // N.B. Fence::Local ensures lifetime of the diagonals range + A.template init_tiles( // Task to create each tile [diagonals_begin](const Range &rng) { // Compute range of diagonal elements in the tile @@ -221,7 +222,6 @@ diagonal_array(World &world, TiledRange const &trange, if constexpr (is_dense_v) { Array A(world, trange); detail::write_diag_tiles_to_array_rng(A, diagonals_begin); - A.world().taskq.fence(); // ensure tasks outlive the diagonals_begin view return A; } else { // Compute shape and init the Array @@ -231,7 +231,6 @@ diagonal_array(World &world, TiledRange const &trange, ShapeType shape(shape_norm, trange); Array A(world, trange, shape); detail::write_diag_tiles_to_array_rng(A, diagonals_begin); - A.world().taskq.fence(); // ensure tasks outlive the diagonals_begin view return A; } abort(); // unreachable diff --git a/src/TiledArray/util/bug.cpp b/src/TiledArray/util/bug.cpp index 5e58ba667c..ff37f14343 100644 --- a/src/TiledArray/util/bug.cpp +++ b/src/TiledArray/util/bug.cpp @@ -166,7 +166,7 @@ void Debugger::set_prefix(const char *p) { void Debugger::set_prefix(int i) { char p[128]; - sprintf(p, "%3d: ", i); + snprintf(p, sizeof(p), "%3d: ", i); set_prefix(p); } diff --git a/tests/dist_array.cpp b/tests/dist_array.cpp index 998b0d8f9f..64f69e69db 100644 --- a/tests/dist_array.cpp +++ b/tests/dist_array.cpp @@ -60,7 +60,7 @@ namespace { std::string to_parallel_archive_file_name(const char* prefix_name, int rank) { char buf[256]; MADNESS_ASSERT(strlen(prefix_name) + 7 <= sizeof(buf)); - sprintf(buf, "%s.%5.5d", prefix_name, rank); + snprintf(buf, sizeof(buf), "%s.%5.5d", prefix_name, rank); return buf; } } // namespace @@ -716,7 +716,7 @@ BOOST_AUTO_TEST_CASE(parallel_serialization) { mktemp(archive_file_prefix_name); madness::archive::ParallelOutputArchive<> oar(world, archive_file_prefix_name, nio); - oar& a; + oar & a; oar.close(); madness::archive::ParallelInputArchive<> iar(world, archive_file_prefix_name, @@ -740,7 +740,7 @@ BOOST_AUTO_TEST_CASE(parallel_sparse_serialization) { mktemp(archive_file_prefix_name); madness::archive::ParallelOutputArchive<> oar(world, archive_file_prefix_name, nio); - oar& b; + oar & b; oar.close(); madness::archive::ParallelInputArchive<> iar(world, archive_file_prefix_name, @@ -783,7 +783,7 @@ BOOST_AUTO_TEST_CASE(issue_225) { madness::archive::BinaryFstreamInputArchive iar(archive_file_name); decltype(S) S_read; decltype(St) St_read; - iar& S_read& St_read; + iar & S_read & St_read; BOOST_CHECK_EQUAL(S_read.trange(), S.trange()); BOOST_REQUIRE(S_read.shape() == S.shape());