From ad60fdf9567c9a0d97875f6be96784b2903ffa14 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Mon, 14 Aug 2023 16:58:56 +0200 Subject: [PATCH 01/17] Create correct 0-dimensional chunks for fence commands --- src/distributed_graph_generator.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/distributed_graph_generator.cc b/src/distributed_graph_generator.cc index ff42be191..f08e24072 100644 --- a/src/distributed_graph_generator.cc +++ b/src/distributed_graph_generator.cc @@ -152,7 +152,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) if(tsk.get_type() == task_type::collective || tsk.get_type() == task_type::fence) { std::vector> chunks; for(size_t nid = 0; nid < m_num_nodes; ++nid) { - chunks.push_back(chunk_cast<3>(chunk<1>{id<1>{nid}, ones, {m_num_nodes}})); + chunks.push_back(chunk_cast<3>(chunk<1>{id<1>{tsk.get_type() == task_type::collective ? nid : 0}, ones, {m_num_nodes}})); } return chunks; } From db8f5540a324ffe97bf1faad2278383b3156e4ae Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Sat, 24 Jun 2023 20:33:19 +0200 Subject: [PATCH 02/17] Checked range/id casts, rename range/id_min/max --- include/accessor.h | 5 +-- include/buffer_manager.h | 4 +- include/buffer_storage.h | 4 +- include/fence.h | 2 +- include/ranges.h | 84 +++++++++++++++++++++++++++++----------- include/workaround.h | 8 ++++ src/buffer_manager.cc | 4 +- 7 files changed, 79 insertions(+), 32 deletions(-) diff --git a/include/accessor.h b/include/accessor.h index a3b6f0b38..f15ae059f 100644 --- a/include/accessor.h +++ b/include/accessor.h @@ -186,9 +186,8 @@ class accessor : public detail::accessor_base // We currently don't support boundary checking for accessors created using accessor_testspy::make_device_accessor, // which does not set m_oob_indices. if(m_oob_indices != nullptr) { - const id all_true = detail::id_cast(id<3>(true, true, true)); - const bool is_within_bounds_lo = (index >= m_accessed_virtual_subrange.offset) == all_true; - const bool is_within_bounds_hi = (index < (m_accessed_virtual_subrange.offset + m_accessed_virtual_subrange.range)) == all_true; + const bool is_within_bounds_lo = all_true(index >= m_accessed_virtual_subrange.offset); + const bool is_within_bounds_hi = all_true(index < (m_accessed_virtual_subrange.offset + m_accessed_virtual_subrange.range)); if((!is_within_bounds_lo || !is_within_bounds_hi)) { for(int d = 0; d < Dims; ++d) { sycl::atomic_ref{m_oob_indices[0][d]}.fetch_min(index[d]); diff --git a/include/buffer_manager.h b/include/buffer_manager.h index 4ff9db73a..e5969caff 100644 --- a/include/buffer_manager.h +++ b/include/buffer_manager.h @@ -375,8 +375,8 @@ namespace detail { resize_info result; if(!is_inside_old_range) { result.resize_required = true; - result.new_offset = min_id(request_offset, buffer.offset); - result.new_range = range_cast<3>(id_cast<3>(max_range(old_abs_range, new_abs_range)) - result.new_offset); + result.new_offset = id_min(request_offset, buffer.offset); + result.new_range = range_cast<3>(id_cast<3>(range_max(old_abs_range, new_abs_range)) - result.new_offset); } return result; } diff --git a/include/buffer_storage.h b/include/buffer_storage.h index db621a8b7..8b20ed877 100644 --- a/include/buffer_storage.h +++ b/include/buffer_storage.h @@ -116,8 +116,8 @@ namespace detail { inline void assert_copy_is_in_range( const range<3>& source_range, const range<3>& target_range, const id<3>& source_offset, const id<3>& target_offset, const range<3>& copy_range) { - assert(max_range(source_range, range_cast<3>(source_offset + copy_range)) == source_range); - assert(max_range(target_range, range_cast<3>(target_offset + copy_range)) == target_range); + assert(range_max(source_range, range_cast<3>(source_offset + copy_range)) == source_range); + assert(range_max(target_range, range_cast<3>(target_offset + copy_range)) == target_range); } template diff --git a/include/fence.h b/include/fence.h index 5c3e48259..626dbcbe6 100644 --- a/include/fence.h +++ b/include/fence.h @@ -93,7 +93,7 @@ class buffer_fence_promise final : public detail::fence_promise { void fulfill() override { const auto access_info = runtime::get_instance().get_buffer_manager().access_host_buffer(get_buffer_id(m_buffer), access_mode::read, m_subrange); - assert((id_cast(access_info.backing_buffer_offset) <= m_subrange.offset) == id_cast(id<3>(true, true, true))); + assert(all_true(id_cast(access_info.backing_buffer_offset) <= m_subrange.offset)); auto data = std::make_unique(m_subrange.range.size()); memcpy_strided_host(access_info.ptr, data.get(), sizeof(DataT), range_cast(access_info.backing_buffer_range), m_subrange.offset - id_cast(access_info.backing_buffer_offset), m_subrange.range, {}, m_subrange.range); diff --git a/include/ranges.h b/include/ranges.h index ab956fc34..6e42c7c99 100644 --- a/include/ranges.h +++ b/include/ranges.h @@ -26,8 +26,16 @@ struct make_from_t { // and would otherwise be prohibited by strict-aliasing rules (because two identical pointers with the same type must point to the same object). template struct coordinate_storage { - constexpr size_t operator[](int dimension) const { return values[dimension]; } - constexpr size_t& operator[](int dimension) { return values[dimension]; } + constexpr size_t operator[](int dimension) const { + CELERITY_DETAIL_ASSERT_ON_HOST(dimension < Dims); + return values[dimension]; + } + + constexpr size_t& operator[](int dimension) { + CELERITY_DETAIL_ASSERT_ON_HOST(dimension < Dims); + return values[dimension]; + } + size_t values[Dims] = {}; }; @@ -59,7 +67,7 @@ class coordinate { template )>> constexpr coordinate(const size_t dim_0, const Values... dim_n) : m_values{{dim_0, static_cast(dim_n)...}} {} - constexpr size_t get(int dimension) { return m_values[dimension]; } + constexpr size_t get(int dimension) const { return m_values[dimension]; } constexpr size_t& operator[](int dimension) { return m_values[dimension]; } constexpr size_t operator[](int dimension) const { return m_values[dimension]; } @@ -208,18 +216,19 @@ class coordinate { CELERITY_DETAIL_NO_UNIQUE_ADDRESS coordinate_storage m_values; }; -template -InterfaceOut coordinate_cast(const coordinate& in) { +template +InterfaceOut coordinate_cast(const InterfaceIn& in) { + CELERITY_DETAIL_ASSERT_ON_HOST(in.get_min_dimensions() <= InterfaceOut::dimensions); return InterfaceOut(make_from, in); } -template -range range_cast(const coordinate& in) { +template +range range_cast(const InterfaceIn& in) { return coordinate_cast>(in); } -template -id id_cast(const coordinate& in) { +template +id id_cast(const InterfaceIn& in) { return coordinate_cast>(in); } @@ -278,11 +287,19 @@ class range : public detail::coordinate, Dims> { } } + /// Returns the smallest dimensionality that `*this` can be `range_cast` to. + int get_min_dimensions() const { + for(int dims = Dims; dims > 0; --dims) { + if((*this)[dims - 1] > 1) return dims; + } + return 0; + } + private: friend class detail::coordinate, Dims>; - template - friend InterfaceOut detail::coordinate_cast(const detail::coordinate& in); + template + friend InterfaceOut detail::coordinate_cast(const InterfaceIn& in); template > constexpr range() noexcept {} @@ -336,9 +353,17 @@ class id : public detail::coordinate, Dims> { } } + /// Returns the smallest dimensionality that `*this` can be `id_cast` to. + int get_min_dimensions() const { + for(int dims = Dims; dims > 0; --dims) { + if((*this)[dims - 1] > 0) { return dims; } + } + return 0; + } + private: - template - friend InterfaceOut detail::coordinate_cast(const detail::coordinate& in); + template + friend InterfaceOut detail::coordinate_cast(const InterfaceIn& in); template constexpr id(const detail::make_from_t /* tag */, const detail::coordinate& in) @@ -446,9 +471,9 @@ namespace detail { inline size_t get_linear_index(const range<3>& range, const id<3>& index) { return index[0] * range[1] * range[2] + index[1] * range[2] + index[2]; } -#define MAKE_COMPONENT_WISE_BINARY_FN(name, range_type, op) \ +#define CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN(name, coord, op) \ template \ - range_type name(const range_type& a, const range_type& b) { \ + coord name(const coord& a, const coord& b) { \ auto result = a; \ for(int d = 0; d < Dims; ++d) { \ result[d] = op(result[d], b[d]); \ @@ -456,12 +481,21 @@ namespace detail { return result; \ } - MAKE_COMPONENT_WISE_BINARY_FN(min_range, range, std::min) - MAKE_COMPONENT_WISE_BINARY_FN(max_range, range, std::max) - MAKE_COMPONENT_WISE_BINARY_FN(min_id, id, std::min) - MAKE_COMPONENT_WISE_BINARY_FN(max_id, id, std::max) + CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN(range_min, range, std::min) + CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN(range_max, range, std::max) + CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN(id_min, id, std::min) + CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN(id_max, id, std::max) + +#undef CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN -#undef MAKE_COMPONENT_WISE_BINARY_FN + template + bool all_true(const coordinate &bools) { + for(int d = 0; d < Dims; ++d) { + CELERITY_DETAIL_ASSERT_ON_HOST(bools[d] == 0 || bools[d] == 1); + if(bools[d] == 0) return false; + } + return true; + } } // namespace detail @@ -478,6 +512,9 @@ struct chunk { chunk(const id& offset, const celerity::range& range, const celerity::range& global_size) : offset(offset), range(range), global_size(global_size) {} + /// Returns the smallest dimensionality that `*this` can be `chunk_cast` to. + int get_min_dimensions() const { return std::max({offset.get_min_dimensions(), range.get_min_dimensions(), global_size.get_min_dimensions()}); } + friend bool operator==(const chunk& lhs, const chunk& rhs) { return lhs.offset == rhs.offset && lhs.range == rhs.range && lhs.global_size == rhs.global_size; } @@ -492,11 +529,12 @@ struct subrange { CELERITY_DETAIL_NO_UNIQUE_ADDRESS celerity::range range = detail::zeros; subrange() = default; - subrange(const id& offset, const celerity::range& range) : offset(offset), range(range) {} - subrange(const chunk& other) : offset(other.offset), range(other.range) {} + /// Returns the smallest dimensionality that `*this` can be `subrange_cast` to. + int get_min_dimensions() const { return std::max({offset.get_min_dimensions(), range.get_min_dimensions()}); } + friend bool operator==(const subrange& lhs, const subrange& rhs) { return lhs.offset == rhs.offset && lhs.range == rhs.range; } friend bool operator!=(const subrange& lhs, const subrange& rhs) { return !operator==(lhs, rhs); } }; @@ -505,11 +543,13 @@ namespace detail { template chunk chunk_cast(const chunk& other) { + CELERITY_DETAIL_ASSERT_ON_HOST(other.get_min_dimensions() <= Dims); return chunk{detail::id_cast(other.offset), detail::range_cast(other.range), detail::range_cast(other.global_size)}; } template subrange subrange_cast(const subrange& other) { + CELERITY_DETAIL_ASSERT_ON_HOST(other.get_min_dimensions() <= Dims); return subrange{detail::id_cast(other.offset), detail::range_cast(other.range)}; } diff --git a/include/workaround.h b/include/workaround.h index eccc5eb89..5ef8fd861 100644 --- a/include/workaround.h +++ b/include/workaround.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include @@ -41,3 +43,9 @@ #define CELERITY_DETAIL_HAS_NO_UNIQUE_ADDRESS false #define CELERITY_DETAIL_NO_UNIQUE_ADDRESS #endif + +#if CELERITY_DETAIL_ENABLE_DEBUG && !defined(__SYCL_DEVICE_ONLY__) +#define CELERITY_DETAIL_ASSERT_ON_HOST(...) assert(__VA_ARGS__) +#else +#define CELERITY_DETAIL_ASSERT_ON_HOST(...) +#endif diff --git a/src/buffer_manager.cc b/src/buffer_manager.cc index 10d60dae0..00c89855e 100644 --- a/src/buffer_manager.cc +++ b/src/buffer_manager.cc @@ -73,7 +73,7 @@ namespace detail { buffer_manager::access_info buffer_manager::access_device_buffer(buffer_id bid, access_mode mode, const subrange<3>& sr) { std::unique_lock lock(m_mutex); - assert((range_cast<3>(sr.offset + sr.range) <= m_buffer_infos.at(bid).range) == range<3>(true, true, true)); + assert(all_true(range_cast<3>(sr.offset + sr.range) <= m_buffer_infos.at(bid).range)); auto& existing_buf = m_buffers[bid].device_buf; backing_buffer replacement_buf; @@ -173,7 +173,7 @@ namespace detail { } buffer_manager::access_info buffer_manager::access_host_buffer_impl(const buffer_id bid, const access_mode mode, const subrange<3>& sr) { - assert((range_cast<3>(sr.offset + sr.range) <= m_buffer_infos.at(bid).range) == range<3>(true, true, true)); + assert(all_true(range_cast<3>(sr.offset + sr.range) <= m_buffer_infos.at(bid).range)); auto& existing_buf = m_buffers[bid].host_buf; backing_buffer replacement_buf; From 0a2e03757023c5a80ede546a8ffde8cc5bab3fec Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Sat, 24 Jun 2023 20:35:07 +0200 Subject: [PATCH 03/17] Re-implement grid data structures with normalized regions --- CMakeLists.txt | 1 + include/grid.h | 333 ++++++++++++++++++ include/print_utils.h | 71 +++- src/grid.cc | 537 +++++++++++++++++++++++++++++ test/CMakeLists.txt | 14 +- test/grid_tests.cc | 783 ++++++++++++++++++++++++++++++++++++++++++ test/test_utils.h | 11 +- 7 files changed, 1732 insertions(+), 18 deletions(-) create mode 100644 src/grid.cc create mode 100644 test/grid_tests.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 488f9ce0f..ebf9da7df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -191,6 +191,7 @@ set(SOURCES src/executor.cc src/distributed_graph_generator.cc src/graph_serializer.cc + src/grid.cc src/print_graph.cc src/print_utils.cc src/recorders.cc diff --git a/include/grid.h b/include/grid.h index 1802ccb0d..90130c5ee 100644 --- a/include/grid.h +++ b/include/grid.h @@ -1,7 +1,13 @@ #pragma once +#include "workaround.h" +#include +#include +#include + #include #include +#include #undef assert_fail // Incompatible with fmt #include "ranges.h" @@ -50,5 +56,332 @@ namespace detail { inline subrange<2> grid_box_to_subrange(const GridBox<2>& box) { return impl::grid_box_to_subrange<2>(box); } inline subrange<3> grid_box_to_subrange(const GridBox<3>& box) { return impl::grid_box_to_subrange<3>(box); } + template + class box; + + template + class region; + } // namespace detail } // namespace celerity + +namespace celerity::detail::grid_detail { + +struct normalized_t { +} inline constexpr normalized; + +struct non_empty_t { +} inline constexpr non_empty; + +template +box make_box(Params&&... args) { + return box(std::forward(args)...); +} + +template +region make_region(Params&&... args) { + return region(std::forward(args)...); +} + +template +int get_min_dimensions(const InputIterator first, const InputIterator last) { + return std::accumulate(first, last, 0, [](const int min_dims, const auto& box) { return std::max(min_dims, box.get_min_dimensions()); }); +} + +} // namespace celerity::detail::grid_detail + +namespace celerity::detail { + +/// An arbitrary-dimensional box described by its minimum and maximum points. +template +class box /* class instead of struct: enforces min <= max invariant */ { + public: + static_assert(Dims >= 0); + static constexpr int dimensions = Dims; + + /// Construct an empty box for Dims > 0, and a unit-sized box for Dims == 0 + box() = default; + + /// Construct a box from two points where `min` must be less or equal to `max` in every dimension. + /// Empty boxes are normalized to [0,0,0] - [0,0,0], meaning that every box-shaped set of points has a unique representation. + box(const id& min, const id& max) { + bool non_empty = true; + for(int d = 0; d < Dims; ++d) { + // Ideally all coordinates would be signed types, but since id and range must be unsigned to conform with SYCL, we trap size_t overflows and + // incorrect casts from negative integers in user code in this assertion. + CELERITY_DETAIL_ASSERT_ON_HOST(std::max(min[d], max[d]) < std::numeric_limits::max() / 2 && "potential integer overflow detected"); + // Precondition: + CELERITY_DETAIL_ASSERT_ON_HOST(min[d] <= max[d]); + non_empty &= min[d] < max[d]; + } + m_min = non_empty ? min : id{}; + m_max = non_empty ? max : id{}; + } + + box(const subrange& other) : box(other.offset, other.offset + other.range) { +#if CELERITY_DETAIL_ENABLE_DEBUG + for(int d = 0; d < Dims; ++d) { + CELERITY_DETAIL_ASSERT_ON_HOST(other.range[d] < std::numeric_limits::max() - other.offset[d]); + } +#endif + } + + bool empty() const { + if constexpr(Dims > 0) { + return m_max[0] == 0; // empty boxes are normalized to [0,0,0] - [0,0,0] + } else { + return false; // edge case: min == max, but 0-dimensional boxes are always size 1 + } + } + + const id& get_min() const { return m_min; } + const id& get_max() const { return m_max; } + + const id& get_offset() const { return m_min; } + range get_range() const { return range_cast(m_max - m_min); } + subrange get_subrange() const { return {get_offset(), get_range()}; } + operator subrange() const { return get_subrange(); } + + /// Counts the number of points covered by the region. + size_t get_area() const { return get_range().size(); } + + /// Returns the smallest dimensionality that `*this` can be `box_cast` to. + int get_min_dimensions() const { + if(empty()) return 1; // edge case: a 0-dimensional box is always non-empty + for(int dims = Dims; dims > 0; --dims) { + if(m_max[dims - 1] > 1) { return dims; } + } + return 0; + } + + bool covers(const box& other) const { + // empty boxes are normalized and thus may not intersect in coordinates + if(other.empty()) return true; + + for(int d = 0; d < Dims; ++d) { + if(other.m_min[d] < m_min[d]) return false; + if(other.m_max[d] > m_max[d]) return false; + } + return true; + } + + friend bool operator==(const box& lhs, const box& rhs) { return lhs.m_min == rhs.m_min && lhs.m_max == rhs.m_max; } + friend bool operator!=(const box& lhs, const box& rhs) { return !operator==(lhs, rhs); } + + private: + template + friend box grid_detail::make_box(P&&... args); + + id m_min; + id m_max; + + // fast code path for grid algorithms that does not attempt to normalize empty boxes + box(grid_detail::non_empty_t /* tag */, const id& min, const id& max) : m_min(min), m_max(max) { +#if CELERITY_DETAIL_ENABLE_DEBUG + for(int d = 0; d < Dims; ++d) { + CELERITY_DETAIL_ASSERT_ON_HOST(min[d] < max[d]); + } +#endif + } +}; + +/// Boxes can be cast between dimensionalities as long as no information is lost (i.e. a cast to a higher dimensionality is always round-trip safe). +template +box box_cast(const box& in) { + CELERITY_DETAIL_ASSERT_ON_HOST(in.get_min_dimensions() <= DimsOut); + return box(subrange_cast(in.get_subrange())); // cast through subrange to fill missing range dimensions with 1s +} + +template +box bounding_box(const box& box1, const box& box2) { + // empty boxes are covered by any other box, but their normalized coordinates should not contribute to the bounding box + if(box1.empty()) return box2; + if(box2.empty()) return box1; + + const auto min = id_min(box1.get_min(), box2.get_min()); + const auto max = id_max(box1.get_max(), box2.get_max()); + return box(min, max); +} + +template +auto bounding_box(InputIterator first, const InputIterator last) { + using box_type = typename std::iterator_traits::value_type; + if(first == last) { + assert(box_type::dimensions > 0); // box<0> can never be empty + return box_type(); + } + + const auto init = *first; + return std::accumulate(++first, last, init, bounding_box); +} + +template +auto bounding_box(const Range& range) { + using std::begin, std::end; + return bounding_box(begin(range), end(range)); +} + +template +box box_intersection(const box& box1, const box& box2) { + const auto min = id_max(box1.get_min(), box2.get_min()); + const auto max = id_min(box1.get_max(), box2.get_max()); + for(int d = 0; d < Dims; ++d) { + if(min[d] >= max[d]) return {}; + } + return {min, max}; +} + +/// Comparison operator (similar to std::less) that orders boxes by their minimum, then their maximum, both starting with the first ("slowest") dimension. +/// This ordering is somewhat arbitrary but allows equality comparisons between ordered sequences of boxes (i.e., regions) +struct box_coordinate_order { + template + bool operator()(const box& lhs, const box& rhs) const { + for(int d = 0; d < Dims; ++d) { + if(lhs.get_min()[d] < rhs.get_min()[d]) return true; + if(lhs.get_min()[d] > rhs.get_min()[d]) return false; + } + for(int d = 0; d < Dims; ++d) { + if(lhs.get_max()[d] < rhs.get_max()[d]) return true; + if(lhs.get_max()[d] > rhs.get_max()[d]) return false; + } + return false; + } +}; + +/// An arbitrary-dimensional set of points described by a normalized tiling of boxes. +template +class region { + public: + constexpr static int dimensions = Dims; + using box = detail::box; + + region() = default; + region(const box& single_box); + region(const subrange& single_sr); + + /// Constructs a region by normalizing an arbitrary, potentially-overlapping tiling of boxes. + explicit region(std::vector&& boxes); + + const std::vector& get_boxes() const& { return m_boxes; } + + std::vector into_boxes() && { return std::move(m_boxes); } + + bool empty() const { return m_boxes.empty(); } + + /// Counts the number of points covered by the region. + size_t get_area() const { + return std::accumulate(m_boxes.begin(), m_boxes.end(), size_t{0}, [](const size_t area, const box& box) { return area + box.get_area(); }); + } + + /// Returns the smallest dimensionality that `*this` can be `region_cast` to. + int get_min_dimensions() const { return grid_detail::get_min_dimensions(m_boxes.begin(), m_boxes.end()); } + + friend bool operator==(const region& lhs, const region& rhs) { return lhs.m_boxes == rhs.m_boxes; } + friend bool operator!=(const region& lhs, const region& rhs) { return !(lhs == rhs); } + + private: + template + friend region grid_detail::make_region(P&&... args); + + std::vector m_boxes; + + region(grid_detail::normalized_t, std::vector&& boxes); +}; + +} // namespace celerity::detail + +namespace celerity::detail::grid_detail { + +template +std::vector> boxes_cast(const std::vector>& in) { + assert(get_min_dimensions(in.begin(), in.end()) <= DimsOut); + std::vector> out(in.size()); + std::transform(in.begin(), in.end(), out.begin(), [](const box& box) { return box_cast(box); }); + return out; +} + +// forward-declaration for tests (explicitly instantiated) +template +void dissect_box(const box& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim); + +// forward-declaration for tests (explicitly instantiated) +template +BidirectionalIterator merge_connected_boxes_along_dim(const BidirectionalIterator first, const BidirectionalIterator last); + +// forward-declaration for tests (explicitly instantiated) +template +void normalize(std::vector>& boxes); + +// rvalue shortcut for normalize(lvalue) +template +std::vector>&& normalize(std::vector>&& boxes) { + normalize(boxes); + return std::move(boxes); +} + +} // namespace celerity::detail::grid_detail + +namespace celerity::detail { + +template +region region_cast(const region& in) { + assert(in.get_min_dimensions() <= DimsOut); + // a normalized region will remain normalized after the cast + return grid_detail::make_region(grid_detail::normalized, grid_detail::boxes_cast(in.get_boxes())); +} + +template +box bounding_box(const region& region) { + return bounding_box(region.get_boxes().begin(), region.get_boxes().end()); +} + +template +region region_union(const region& lhs, const region& rhs); + +template +region region_union(const region& lhs, const box& rhs) { + return region_union(lhs, region(rhs)); +} + +template +region region_union(const box& lhs, const region& rhs) { + return region_union(region(lhs), rhs); +} + +template +region region_union(const box& lhs, const box& rhs) { + return region(std::vector{lhs, rhs}); +} + +template +region region_intersection(const region& lhs, const region& rhs); + +template +region region_intersection(const region& lhs, const box& rhs) { + return region_intersection(lhs, region(rhs)); +} + +template +region region_intersection(const box& lhs, const region& rhs) { + return region_intersection(region(lhs), rhs); +} + +template +region region_difference(const region& lhs, const region& rhs); + +template +region region_difference(const region& lhs, const box& rhs) { + return region_difference(lhs, region(rhs)); +} + +template +region region_difference(const box& lhs, const region& rhs) { + return region_difference(region(lhs), rhs); +} + +template +region region_difference(const box& lhs, const box& rhs) { + return region_difference(region(lhs), region(rhs)); +} + +} // namespace celerity::detail diff --git a/include/print_utils.h b/include/print_utils.h index a25a2c564..7e4c6cf9b 100644 --- a/include/print_utils.h +++ b/include/print_utils.h @@ -1,22 +1,69 @@ #pragma once +#include "grid.h" #include "ranges.h" -namespace celerity { +#include -namespace detail { - std::ostream& print_chunk3(std::ostream& os, chunk<3> chnk3); - std::ostream& print_subrange3(std::ostream& os, subrange<3> subr3); -} // namespace detail +template +struct fmt::formatter> : fmt::formatter { + format_context::iterator format(const Interface& coord, format_context& ctx) const { + auto out = ctx.out(); + *out++ = '['; + for(int d = 0; d < Dims; ++d) { + if(d != 0) *out++ = ','; + out = formatter::format(coord[d], ctx); + } + *out++ = ']'; + return out; + } +}; template -std::ostream& operator<<(std::ostream& os, chunk chnk) { - return detail::print_chunk3(os, detail::chunk_cast<3>(chnk)); -} +struct fmt::formatter> : fmt::formatter, Dims>> {}; template -std::ostream& operator<<(std::ostream& os, subrange subr) { - return detail::print_subrange3(os, detail::subrange_cast<3>(subr)); -} +struct fmt::formatter> : fmt::formatter, Dims>> {}; -} // namespace celerity +template +struct fmt::formatter> : fmt::formatter> { + format_context::iterator format(const celerity::detail::box& box, format_context& ctx) const { + auto out = ctx.out(); + out = formatter>::format(box.get_min(), ctx); + out = std::copy_n(" - ", 3, out); + out = formatter>::format(box.get_max(), ctx); + return out; + } +}; + +template +struct fmt::formatter> : fmt::formatter> { + format_context::iterator format(const celerity::detail::region& region, format_context& ctx) const { + auto out = ctx.out(); + *out++ = '{'; + for(size_t i = 0; i < region.get_boxes().size(); ++i) { + if(i != 0) out = std::copy_n(", ", 2, out); + out = formatter>::format(region.get_boxes()[i], ctx); + } + *out++ = '}'; + return out; + } +}; + +template +struct fmt::formatter> : fmt::formatter> { + format_context::iterator format(const celerity::subrange& sr, format_context& ctx) const { + return fmt::formatter>::format(celerity::detail::box(sr), ctx); + } +}; + +template +struct fmt::formatter> : fmt::formatter> { + format_context::iterator format(const celerity::chunk& chunk, format_context& ctx) const { + auto out = ctx.out(); + out = fmt::formatter>::format(celerity::subrange(chunk.offset, chunk.range), ctx); + out = std::copy_n(" : ", 3, out); + out = formatter>::format(celerity::id(chunk.global_size), ctx); // cast to id to avoid multiple inheritance + return out; + } +}; diff --git a/src/grid.cc b/src/grid.cc new file mode 100644 index 000000000..2a9fa645e --- /dev/null +++ b/src/grid.cc @@ -0,0 +1,537 @@ +#include "grid.h" + +namespace celerity::detail::grid_detail { + +// Regions have a storage dimensionality (the `Dims` template parameter of `class region`) and an effective dimensionality that is smaller iff all contained +// boxes are effectively the result of casting e.g. box<2> to box<3>, or the described region "accidentally" is a lower-dimensional slice of the full space. +// This property is detected at runtime through {box,region}::get_min_dimensions(), and all region-algorithm implementations are generic over both StorageDims +// and EffectiveDims to optimize for the embedding of arbitrary-dimensional regions into region<3> as it commonly happens in the runtime. + +// 2-connectivity for 1d boxes, 4-connectivity for 2d boxes and 6-connectivity for 3d boxes. +template +bool boxes_connected(const box& box1, const box& box2) { + static_assert(EffectiveDims <= StorageDims); + + if(box1.empty() || box2.empty()) return false; + + bool touching = false; + for(int d = 0; d < EffectiveDims; ++d) { + const auto min = std::max(box1.get_min()[d], box2.get_min()[d]); + const auto max = std::min(box1.get_max()[d], box2.get_max()[d]); + if(min[d] > max[d]) return false; // fully disconnected, even across corners + if(min[d] == max[d]) { + // when boxes are touching (but not intersecting) in more than one dimension, they can only be connected via corners + if(touching) return false; + touching = true; + } + } + return true; +} + +// Like detail::box_intersection, but aware of effective dimensionality +template +box box_intersection(const box& box1, const box& box2) { + static_assert(EffectiveDims <= StorageDims); + + id min; + id max; + for(int d = 0; d < EffectiveDims; ++d) { + min[d] = std::max(box1.get_min()[d], box2.get_min()[d]); + max[d] = std::min(box1.get_max()[d], box2.get_max()[d]); + if(min[d] >= max[d]) return {}; + } + for(int d = EffectiveDims; d < StorageDims; ++d) { + min[d] = 0; + max[d] = 1; + } + return make_box(non_empty, min, max); +} + +// Like box::covers, but aware of effective dimensionality +template +bool box_covers(const box& top, const box& bottom) { + static_assert(EffectiveDims <= StorageDims); + + // empty boxes are normalized and thus may not intersect in coordinates + if(bottom.empty()) return true; + + for(int d = 0; d < EffectiveDims; ++d) { + if(bottom.get_min()[d] < top.get_min()[d]) return false; + if(bottom.get_max()[d] > top.get_max()[d]) return false; + } + return true; +} + +// O(N^2) remove any box A != B for which box_covers(B, A) is true +template +BidirectionalIterator remove_pairwise_covered(BidirectionalIterator first, BidirectionalIterator last) { + for(auto top = first; top != last; ++top) { + top_replaced: + for(auto bottom = std::next(top); bottom != last;) { + if(box_covers(*top, *bottom)) { + *bottom = *--last; + } else if(box_covers(*bottom, *top)) { + *top = *bottom; + *bottom = *--last; + goto top_replaced; // NOLINT(cppcoreguidelines-avoid-goto) + } else { + ++bottom; + } + } + } + return last; +} + +// Partition a range of boxes into intervals described by a grid of dissection lines, and invoke a user function on each partition. +template +void for_each_dissection_interval(BidirectionalIterator first, BidirectionalIterator last, const std::vector>& cuts, Fn&& f, int dim = 0) { + using box_type = typename std::iterator_traits::value_type; + + assert(first != last); + + if(cuts.size() <= static_cast(dim)) { + // We are past the last dissected dimension, so the interval is just our entire input range + f(first, last); + return; + } + + // Since boxes can never cross a dissection line, we can partition the range into dissection intervals by sorting along one dimension + std::sort(first, last, [dim](const box_type& lhs, const box_type& rhs) { return lhs.get_min()[dim] < rhs.get_min()[dim]; }); + + auto next_cut = cuts[dim].begin(); + while(first != last) { + // The current box `first` always belongs to our interval. Now find, in O(log N), the dissection line that marks the end of this interval + next_cut = std::upper_bound(next_cut, cuts[dim].end(), first->get_min()[dim]); + assert(next_cut != cuts[dim].end()); + + // Find, in O(log N), the end iterator of our interval by searching the first item that is "right" of the dissection line + const auto next = std::lower_bound(first, last, *next_cut, [dim](const box_type& lhs, const size_t cut) { return lhs.get_min()[dim] < cut; }); + + // Recurse into the found interval along the next (faster) dimension + for_each_dissection_interval(first, next, cuts, f, dim + 1); + + first = next; + } +} + +// Like remove_pairwise_covered(first, last), but at lower average complexity for a range of boxes that are dissected according to `cuts`. +template +BidirectionalIterator remove_pairwise_covered(BidirectionalIterator first, BidirectionalIterator last, const std::vector>& cuts) { + using box_type [[maybe_unused]] = typename std::iterator_traits::value_type; + + assert(cuts.size() <= EffectiveDims); + assert(std::all_of(cuts.begin(), cuts.end(), [](const std::vector& dim_cuts) { return std::is_sorted(dim_cuts.begin(), dim_cuts.end()); })); + + if(first == last || std::next(first) == last) return last; + + // We compact the range in-place after each removal by left-shifting each de-duplicated range + auto last_out = first; + + for_each_dissection_interval(first, last, cuts, [&](const BidirectionalIterator i_first, const BidirectionalIterator i_last) { + // Delegate the interval to the O(N^2) overload of remove_pairwise_covered + const auto last_retained = remove_pairwise_covered(i_first, i_last); + // for_each_dissection_interval will not touch [first, i_last) after this iteration + last_out = std::move(i_first, last_retained, last_out); + }); + + return last_out; +} + +// In a range of boxes that are identical in all dimensions except MergeDim, merge all connected boxes ("unconditional directional merge") +template +BidirectionalIterator merge_connected_intervals(BidirectionalIterator first, BidirectionalIterator last) { + using box_type = typename std::iterator_traits::value_type; + + if(first == last || std::next(first) == last) return last; // common-case shortcut: no merge is possible + + // Sort by interval starting point + std::sort(first, last, [](const box_type& lhs, const box_type& rhs) { return lhs.get_min()[MergeDim] < rhs.get_min()[MergeDim]; }); + + // The range is both read and written from left-to-right, avoiding repeated left-shifts for compaction + auto last_out = first; + + // Merge all connected boxes along MergeDim in O(N) by replacing each connected sequence with its bounding box + while(first != last) { + const auto merged_min = first->get_min(); + auto merged_max = first->get_max(); + for(++first; first != last && first->get_min()[MergeDim] <= merged_max[MergeDim]; ++first) { + merged_max[MergeDim] = std::max(merged_max[MergeDim], first->get_max()[MergeDim]); + } + *last_out++ = make_box(grid_detail::non_empty, merged_min, merged_max); + } + + return last_out; +} + +// In an arbitrary range of boxes, merge all boxes that are identical in all dimensions except MergeDim ("conditional directional merge"). +template +BidirectionalIterator merge_connected_boxes_along_dim(const BidirectionalIterator first, const BidirectionalIterator last) { + using box_type = typename std::iterator_traits::value_type; + static_assert(EffectiveDims <= box_type::dimensions); + static_assert(MergeDim < EffectiveDims); + + constexpr auto orthogonal_to_merge_dim = [](const box_type& lhs, const box_type& rhs) { + for(int d = 0; d < EffectiveDims; ++d) { + if(d == MergeDim) continue; + // arbitrary but consistent ordering along all orthogonal dimensions + if(lhs.get_min()[d] < rhs.get_min()[d]) return true; + if(lhs.get_min()[d] > rhs.get_min()[d]) return false; + if(lhs.get_max()[d] < rhs.get_max()[d]) return true; + if(lhs.get_max()[d] > rhs.get_max()[d]) return false; + } + return false; + }; + + if constexpr(EffectiveDims == 1) { + return merge_connected_intervals(first, last); + } else { + // partition [first, last) into sequences of boxes that are potentially mergeable wrt/ the dimensions orthogonal to MergeDim. + // This reduces complexity from O(n^3) to O(n log n) + O(m^3), where m is the longest mergeable sequence in that regard. + std::sort(first, last, orthogonal_to_merge_dim); + + // we want the result to be contiguous in [first, last_out), so in each iteration, we merge all boxes of a MergeDim-equal partition at their original + // position in the iterator range; and then shift the merged range back to fill any gap left by merge of a previous partition. + auto last_out = first; + + for(auto first_equal = first; first_equal != last;) { + // O(n) std::find_if could be replaced by O(log n) std::partition_point, but we expect the number of "equal" elements to be small + const auto last_equal = std::find_if(std::next(first_equal), last, [&](const box_type& box) { + return orthogonal_to_merge_dim(*first_equal, box); // true if box is in a partition _after_ *first_equal + }); + const auto last_merged = merge_connected_intervals(first_equal, last_equal); + // shift the newly merged boxes to the left to close any gap opened by the merge of a previous partition + last_out = std::move(first_equal, last_merged, last_out); + first_equal = last_equal; + } + + return last_out; + } +} + +// explicit instantiations for tests (might otherwise be inlined) +template std::vector>::iterator merge_connected_boxes_along_dim<0, 1>(std::vector>::iterator first, std::vector>::iterator last); +template std::vector>::iterator merge_connected_boxes_along_dim<0, 2>(std::vector>::iterator first, std::vector>::iterator last); +template std::vector>::iterator merge_connected_boxes_along_dim<1, 2>(std::vector>::iterator first, std::vector>::iterator last); +template std::vector>::iterator merge_connected_boxes_along_dim<0, 3>(std::vector>::iterator first, std::vector>::iterator last); +template std::vector>::iterator merge_connected_boxes_along_dim<1, 3>(std::vector>::iterator first, std::vector>::iterator last); +template std::vector>::iterator merge_connected_boxes_along_dim<2, 3>(std::vector>::iterator first, std::vector>::iterator last); + +// For higher-dimensional regions, the order in which dimensions are merged is relevant for the shape of the resulting box set. We merge along the last +// ("fastest") dimension first to make sure the resulting boxes cover the largest possible extent of contiguous memory when are applied to buffers. +template +BidirectionalIterator merge_connected_boxes_recurse(const BidirectionalIterator first, BidirectionalIterator last) { + static_assert(MergeDim >= 0 && MergeDim < EffectiveDims); + last = merge_connected_boxes_along_dim(first, last); + if constexpr(MergeDim > 0) { last = merge_connected_boxes_recurse(first, last); } + return last; +} + +// Merge all adjacent boxes that are connected and identical in all except a single dimension. +template +BidirectionalIterator merge_connected_boxes(const BidirectionalIterator first, BidirectionalIterator last) { + using box_type = typename std::iterator_traits::value_type; + static_assert(EffectiveDims <= box_type::dimensions); + if constexpr(EffectiveDims > 0) { last = merge_connected_boxes_recurse(first, last); } + return last; +} + +// Split a box into parts according to dissection lines in `cuts`, where `cuts` is indexed by component dimension. This function is not generic +// over EffectiveDims, rather, `cuts` will have 1 <= n <= StorageDims entries to indicate along how many dimensions the box should be dissected. +template +void dissect_box(const box& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim) { + assert(dim < static_cast(cuts.size())); + + const auto& dim_cuts = cuts[static_cast(dim)]; + assert(std::is_sorted(dim_cuts.begin(), dim_cuts.end())); + + // start of the first (current) dissected box + size_t start = in_box.get_min()[dim]; + // find the first cut that lies inside the box (dim_cuts is sorted) + auto cut_it = std::lower_bound(dim_cuts.begin(), dim_cuts.end(), /* not less or equal */ start + 1); + + for(;;) { + // the end of the current box is either the last cut that lies inside the box, or the end of in_box + size_t end; + if(cut_it != dim_cuts.end() && *cut_it < in_box.get_max()[dim]) { + end = *cut_it++; + } else { + end = in_box.get_max()[dim]; + } + if(end == start) break; + + // compute coordinates for the dissected box along `dim`, and recursively dissect it further along `dim + 1` + auto min = in_box.get_min(); + auto max = in_box.get_max(); + min[dim] = start; + max[dim] = end; + const auto small_box = make_box(grid_detail::non_empty, min, max); + if(dim + 1 < static_cast(cuts.size())) { + dissect_box(small_box, cuts, out_dissected, dim + 1); + } else { + out_dissected.push_back(small_box); + } + + start = end; + } +} + +// explicit instantiations for tests (might otherwise be inlined) +template void dissect_box(const box<2>& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim); +template void dissect_box(const box<3>& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim); + +// Apply dissect_box to all boxes in a range, with a shortcut if no cuts are to be done. +template +void dissect_boxes(const InputIterator first, const InputIterator last, const std::vector>& cuts, + std::vector::value_type>& out_dissected) { + if(!cuts.empty()) { + for(auto it = first; it != last; ++it) { + dissect_box(*it, cuts, out_dissected, 0); + } + } else { + out_dissected.insert(out_dissected.end(), first, last); + } +} + +// Collect the sorted, unique list of box start- and end points along a single dimension. These can then be used in dissect_boxes. +template +std::vector collect_dissection_lines(const InputIterator first, const InputIterator last, int dim) { + std::vector cuts; + // allocating 2*N integers might seem wasteful, but this has negligible runtime in the profiler and is already algorithmically optimal at O(N log N) + cuts.reserve(std::distance(first, last) * 2); + for(auto it = first; it != last; ++it) { + cuts.push_back(it->get_min()[dim]); + cuts.push_back(it->get_max()[dim]); + } + std::sort(cuts.begin(), cuts.end()); + cuts.erase(std::unique(cuts.begin(), cuts.end()), cuts.end()); + assert(first == last || cuts.size() >= 2); + return cuts; +} + +template +void normalize_impl(std::vector>& boxes) { + static_assert(EffectiveDims <= StorageDims); + assert(!boxes.empty()); + + if constexpr(EffectiveDims == 0) { + // all 0d boxes are identical + boxes.resize(1); + } else if constexpr(EffectiveDims == 1) { + // merge_connected_boxes will sort and merge - this is already the complete 1d normalization + boxes.erase(merge_connected_boxes(boxes.begin(), boxes.end()), boxes.end()); + assert(!boxes.empty()); + assert(std::is_sorted(boxes.begin(), boxes.end(), box_coordinate_order())); + } else { + // 0. (hopefully) fast path: attempt to merge without dissecting first + boxes.erase(merge_connected_boxes(boxes.begin(), boxes.end()), boxes.end()); + assert(!boxes.empty()); + if(boxes.size() == 1) return; + + // 1. dissect boxes along the edges of all other boxes (except the last, "fastest" dim) to create the "maximally mergeable set" of small boxes for step + std::vector> cuts(EffectiveDims - 1); + for(int d = 0; d < EffectiveDims - 1; ++d) { + cuts[static_cast(d)] = collect_dissection_lines(boxes.begin(), boxes.end(), d); + } + + std::vector> disjoint_boxes; + dissect_boxes(boxes.begin(), boxes.end(), cuts, disjoint_boxes); + boxes = std::move(disjoint_boxes); + + // 2. remove all overlap by removing pairwise coverings + const auto first = boxes.begin(); + auto last = boxes.end(); + last = remove_pairwise_covered(first, last, cuts); + + // 3. merge the overlap-free tiling of boxes where possible + last = merge_connected_boxes(first, last); + boxes.erase(last, boxes.end()); + + // 4. normalize box order + std::sort(boxes.begin(), boxes.end(), box_coordinate_order()); + } +} + +// Use together with a generic functor to dispatch the EffectiveDims template parameter at runtime +template +decltype(auto) dispatch_effective_dims(int effective_dims, F&& f) { + assert(effective_dims <= StorageDims); + + // clang-format off + switch(effective_dims) { + case 0: if constexpr(StorageDims >= 0) { return f(std::integral_constant()); } [[fallthrough]]; + case 1: if constexpr(StorageDims >= 1) { return f(std::integral_constant()); } [[fallthrough]]; + case 2: if constexpr(StorageDims >= 2) { return f(std::integral_constant()); } [[fallthrough]]; + case 3: if constexpr(StorageDims >= 3) { return f(std::integral_constant()); } [[fallthrough]]; + default: abort(); // unreachable with the explicit instantiations in this file + } + // clang-format on +} + +// For any set of boxes, find the unique box tiling that covers the same points and is subject to the following constraints: +// 1. the extent of every box is maximized along the last dimension, then along the second-to-last dimension, and so forth. +// 2. no two boxes within the tiling intersect (i.e. cover a common point). +// 3. the tiling contains no empty boxes. +// 4. the normalized sequence is sorted according to box_coordinate_order. +// There is exactly one sequence of boxes for any set of points that fulfills 1-4, meaning that an "==" comparison of normalized tilings would be equivalent +// to an equality comparision of the covered point sets. +template +void normalize(std::vector>& boxes) { + boxes.erase(std::remove_if(boxes.begin(), boxes.end(), std::mem_fn(&box::empty)), boxes.end()); + if(boxes.size() <= 1) return; + + const auto effective_dims = get_min_dimensions(boxes.begin(), boxes.end()); + assert(effective_dims <= Dims); + + dispatch_effective_dims(effective_dims, [&](const auto effective_dims) { // + normalize_impl(boxes); + }); +} + +// explicit instantiations for tests (might otherwise be inlined into region::region) +template void normalize(std::vector>& boxes); +template void normalize(std::vector>& boxes); +template void normalize(std::vector>& boxes); +template void normalize(std::vector>& boxes); + +template +region region_intersection_impl(const region& lhs, const region& rhs) { + static_assert(EffectiveDims <= StorageDims); + + // O(N * M). This can probably be improved for large inputs by dissecting either lhs or rhs by the lines of the other and then performing an interval + // search similar to how remove_pairwise_covered operates. + std::vector> intersection; + for(const auto& left : lhs.get_boxes()) { + for(const auto& right : rhs.get_boxes()) { + if(const auto box = grid_detail::box_intersection(left, right); !box.empty()) { intersection.push_back(box); } + } + } + + // No dissection step is necessary as the intersection of two normalized tilings is already "maximally mergeable". + const auto first = intersection.begin(); + auto last = intersection.end(); + last = grid_detail::merge_connected_boxes(first, last); + + // intersected_boxes retains the sorting from lhs, but for Dims > 1, the intersection can shift min-points such that the box_coordinate_order reverses. + if constexpr(EffectiveDims > 1) { + std::sort(first, last, box_coordinate_order()); + } else { + assert(std::is_sorted(first, last, box_coordinate_order())); + } + + intersection.erase(last, intersection.end()); + return grid_detail::make_region(grid_detail::normalized, std::move(intersection)); +} + +// Complete the region_difference operation with an already dissected left-hand side and knowledge of effective dimensionality. +template +void apply_region_difference(std::vector>& dissected_left, const region& rhs) { + static_assert(EffectiveDims <= StorageDims); + + // O(N * M) remove all dissected boxes from lhs that are fully covered by any box in rhs + const auto first_left = dissected_left.begin(); + auto last_left = dissected_left.end(); + for(const auto& right : rhs.get_boxes()) { + for(auto left_it = first_left; left_it != last_left;) { + if(grid_detail::box_covers(right, *left_it)) { + *left_it = *--last_left; + } else { + ++left_it; + } + } + } + + // merge the now non-overlapping boxes + last_left = grid_detail::merge_connected_boxes(first_left, last_left); + dissected_left.erase(last_left, dissected_left.end()); +} + +} // namespace celerity::detail::grid_detail + +namespace celerity::detail { + +template +region::region(const box& single_box) : region(std::vector{single_box}) {} // still need to normalize in case single_box is empty + +template +region::region(const subrange& single_sr) : region(box(single_sr)) {} + +template +region::region(std::vector&& boxes) : region(grid_detail::normalized, (/* in-place */ grid_detail::normalize(boxes), /* then */ std::move(boxes))) {} + +template +region::region(grid_detail::normalized_t /* tag */, std::vector&& boxes) : m_boxes(std::move(boxes)) {} + +template class region<0>; +template class region<1>; +template class region<2>; +template class region<3>; + +template +region region_union(const region& lhs, const region& rhs) { + // shortcut-evaluate trivial cases + if(lhs.empty()) return rhs; + if(rhs.empty()) return lhs; + + std::vector> box_union; + box_union.reserve(lhs.get_boxes().size() + rhs.get_boxes().size()); + box_union.insert(box_union.end(), lhs.get_boxes().begin(), lhs.get_boxes().end()); + box_union.insert(box_union.end(), rhs.get_boxes().begin(), rhs.get_boxes().end()); + return region(std::move(box_union)); +} + +template region<0> region_union(const region<0>& lhs, const region<0>& rhs); +template region<1> region_union(const region<1>& lhs, const region<1>& rhs); +template region<2> region_union(const region<2>& lhs, const region<2>& rhs); +template region<3> region_union(const region<3>& lhs, const region<3>& rhs); + +template +region region_intersection(const region& lhs, const region& rhs) { + // shortcut-evaluate trivial cases + if(lhs.empty() || rhs.empty()) return {}; + + const auto effective_dims = std::max(lhs.get_min_dimensions(), rhs.get_min_dimensions()); + return grid_detail::dispatch_effective_dims(effective_dims, [&](const auto effective_dims) { // + return grid_detail::region_intersection_impl(lhs, rhs); + }); +} + +template region<0> region_intersection(const region<0>& lhs, const region<0>& rhs); +template region<1> region_intersection(const region<1>& lhs, const region<1>& rhs); +template region<2> region_intersection(const region<2>& lhs, const region<2>& rhs); +template region<3> region_intersection(const region<3>& lhs, const region<3>& rhs); + +template +region region_difference(const region& lhs, const region& rhs) { + // shortcut-evaluate trivial cases + if(lhs.empty()) return {}; + if(rhs.empty()) return lhs; + + // the resulting effective_dims can never be greater than the lhs dimension, but the difference operator must still operate on all available dimensions + // to correctly identify overlapping boxes + const auto effective_dims = std::max(lhs.get_min_dimensions(), rhs.get_min_dimensions()); + assert(effective_dims <= Dims); + + // 1. collect dissection lines (in *all* dimensions) from rhs + std::vector> cuts(effective_dims); + for(int d = 0; d < effective_dims; ++d) { + cuts[static_cast(d)] = grid_detail::collect_dissection_lines(rhs.get_boxes().begin(), rhs.get_boxes().end(), d); + } + + // 2. dissect lhs according to the lines of rhs, so that any overlap between lhs and rhs is turned into an lhs box fully covered by an rhs box + std::vector> dissected_left; + grid_detail::dissect_boxes(lhs.get_boxes().begin(), lhs.get_boxes().end(), cuts, dissected_left); + + grid_detail::dispatch_effective_dims(effective_dims, [&](const auto effective_dims) { // + grid_detail::apply_region_difference(dissected_left, rhs); + }); + std::sort(dissected_left.begin(), dissected_left.end(), box_coordinate_order()); + + return grid_detail::make_region(grid_detail::normalized, std::move(dissected_left)); +} + +template region<0> region_difference(const region<0>& lhs, const region<0>& rhs); +template region<1> region_difference(const region<1>& lhs, const region<1>& rhs); +template region<2> region_difference(const region<2>& lhs, const region<2>& rhs); +template region<3> region_difference(const region<3>& lhs, const region<3>& rhs); + +} // namespace celerity::detail \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f2c3ba7a6..d4774ca2c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -32,6 +32,7 @@ set(TEST_TARGETS graph_gen_reduction_tests graph_gen_transfer_tests graph_compaction_tests + grid_tests intrusive_graph_tests print_graph_tests region_map_tests @@ -69,6 +70,8 @@ foreach(TEST_TARGET ${TEST_TARGETS}) ParseAndAddCatchTests_ParseFile(${TEST_SOURCE} ${TEST_TARGET}) endforeach() +target_sources(grid_tests PRIVATE benchmark_reporters.cc) # TODO HACK + # Add all_tests executable add_executable(all_tests ${TEST_OBJ_LIST}) target_link_libraries(all_tests PRIVATE test_main) @@ -96,8 +99,11 @@ find_library(CAIRO_LIBRARIES ) if(CAIRO_INCLUDE_DIRS AND CAIRO_LIBRARIES) - target_compile_definitions(region_map_tests_OBJ PRIVATE CELERITY_DETAIL_HAVE_CAIRO=1) - target_include_directories(region_map_tests_OBJ PRIVATE ${CAIRO_INCLUDE_DIRS}) - target_link_libraries(region_map_tests PRIVATE ${CAIRO_LIBRARIES}) + message(STATUS "Building tests with cairo support: ${CAIRO_LIBRARIES}") + foreach(TEST_TARGET region_map_tests;grid_tests) + target_compile_definitions(${TEST_TARGET}_OBJ PRIVATE CELERITY_DETAIL_HAVE_CAIRO=1) + target_include_directories(${TEST_TARGET}_OBJ PRIVATE ${CAIRO_INCLUDE_DIRS}) + target_link_libraries(${TEST_TARGET} PRIVATE ${CAIRO_LIBRARIES}) + endforeach() target_link_libraries(all_tests PRIVATE ${CAIRO_LIBRARIES}) -endif() \ No newline at end of file +endif() diff --git a/test/grid_tests.cc b/test/grid_tests.cc new file mode 100644 index 000000000..00f6749a7 --- /dev/null +++ b/test/grid_tests.cc @@ -0,0 +1,783 @@ +#include "grid.h" +#include "test_utils.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#if CELERITY_DETAIL_HAVE_CAIRO +#include +#endif + +using namespace celerity; +using namespace celerity::detail; + +// forward declarations for functions not exposed in grid.h +namespace celerity::detail::grid_detail { + +} // namespace celerity::detail::grid_detail + +struct partition_vector_order { + template + bool operator()(const std::vector>& lhs, const std::vector>& rhs) { + if(lhs.size() < rhs.size()) return true; + if(lhs.size() > rhs.size()) return false; + constexpr box_coordinate_order box_order; + for(size_t i = 0; i < lhs.size(); ++i) { + if(box_order(lhs[i], rhs[i])) return true; + if(box_order(rhs[i], lhs[i])) return false; + } + return false; + } +}; + +// input: h as an angle in [0,360] and s,l in [0,1] - output: r,g,b in [0,1] +std::array hsl2rgb(const float h, const float s, const float l) { + constexpr auto hue2rgb = [](const float p, const float q, float t) { + if(t < 0) t += 1; + if(t > 1) t -= 1; + if(t < 1.f / 6) return p + (q - p) * 6 * t; + if(t < 1.f / 2) return q; + if(t < 2.f / 3) return p + (q - p) * (2.f / 3 - t) * 6; + return p; + }; + + if(s == 0) return {l, l, l}; // achromatic + + const auto q = l < 0.5 ? l * (1 + s) : l + s - l * s; + const auto p = 2 * l - q; + const auto r = hue2rgb(p, q, h + 1.f / 3); + const auto g = hue2rgb(p, q, h); + const auto b = hue2rgb(p, q, h - 1.f / 3); + return {r, g, b}; +} + +void render_boxes(const std::vector>& boxes, const std::string_view suffix = "region") { +#if CELERITY_DETAIL_HAVE_CAIRO + const auto env = std::getenv("CELERITY_RENDER_REGIONS"); + if(env == nullptr || env[0] == 0) return; + + constexpr int ruler_width = 30; + constexpr int ruler_space = 4; + constexpr int text_margin = 2; + constexpr int border_start = ruler_width + ruler_space; + constexpr int cell_size = 20; + constexpr int border_end = 30; + constexpr int inset = 1; + + const auto bounds = bounding_box(boxes); + const auto canvas_width = border_start + static_cast(bounds.get_max()[1]) * cell_size + border_end; + const auto canvas_height = border_start + static_cast(bounds.get_max()[0]) * cell_size + border_end; + + cairo_surface_t* surface = cairo_image_surface_create(CAIRO_FORMAT_ARGB32, canvas_width, canvas_height); + cairo_t* cr = cairo_create(surface); + + cairo_select_font_face(cr, "sans", CAIRO_FONT_SLANT_NORMAL, CAIRO_FONT_WEIGHT_NORMAL); + cairo_set_font_size(cr, 12); + + cairo_set_source_rgb(cr, 0, 0, 0); + cairo_set_line_width(cr, 1); + for(int i = 0; i < static_cast(bounds.get_max()[1]) + 1; ++i) { + const auto x = border_start + 2 * inset + i * cell_size; + cairo_move_to(cr, static_cast(x) - 0.5f, text_margin); + cairo_line_to(cr, static_cast(x) - 0.5f, ruler_width); + cairo_stroke(cr); + const auto label = fmt::format("{}", i); + cairo_text_extents_t te; + cairo_text_extents(cr, label.c_str(), &te); + cairo_move_to(cr, x + text_margin, text_margin + te.height); + cairo_show_text(cr, label.c_str()); + } + for(int i = 0; i < static_cast(bounds.get_max()[0]) + 1; ++i) { + const auto y = border_start + 2 * inset + i * cell_size; + cairo_move_to(cr, text_margin, static_cast(y) - 0.5f); + cairo_line_to(cr, ruler_width, static_cast(y) - 0.5f); + cairo_stroke(cr); + const auto label = fmt::format("{}", i); + cairo_text_extents_t te; + cairo_text_extents(cr, label.c_str(), &te); + cairo_move_to(cr, text_margin, y + te.height + text_margin); + cairo_show_text(cr, label.c_str()); + } + + cairo_set_operator(cr, CAIRO_OPERATOR_HSL_HUE); + for(size_t i = 0; i < boxes.size(); ++i) { + const auto hue = static_cast(i) / static_cast(boxes.size()); + const auto [r, g, b] = hsl2rgb(hue, 0.8f, 0.6f); + cairo_set_source_rgb(cr, r, g, b); + const auto sr = static_cast>(boxes[i]); + const auto x = border_start + 2 * inset + static_cast(sr.offset[1]) * cell_size; + const auto y = border_start + 2 * inset + static_cast(sr.offset[0]) * cell_size; + const auto w = static_cast(sr.range[1]) * cell_size - 2 * inset; + const auto h = static_cast(sr.range[0]) * cell_size - 2 * inset; + cairo_rectangle(cr, x, y, w, h); + cairo_fill(cr); + } + + cairo_set_source_rgb(cr, 1.0, 1.0, 1.0); + cairo_rectangle(cr, 0, 0, canvas_width, canvas_height); + cairo_set_operator(cr, CAIRO_OPERATOR_DEST_OVER); + cairo_fill(cr); + + cairo_destroy(cr); + + const auto test_name = Catch::getResultCapture().getCurrentTestName(); + const auto image_name = fmt::format("{}-{}.png", std::regex_replace(test_name, std::regex("[^a-zA-Z0-9]+"), "-"), suffix); + cairo_surface_write_to_png(surface, image_name.c_str()); + cairo_surface_destroy(surface); +#else + (void)boxes; +#endif +} + + +TEST_CASE("split_box dissects boxes as expected - 3d", "[grid]") { + const box<3> input_box{{0, 0, 0}, {7, 9, 5}}; + const std::vector> cuts{ + {0, 4, 8, 12}, + {8, 9}, + }; + std::vector> expected{ + {{0, 0, 0}, {4, 8, 5}}, + {{0, 8, 0}, {4, 9, 5}}, + {{4, 0, 0}, {7, 8, 5}}, + {{4, 8, 0}, {7, 9, 5}}, + }; + + std::vector> split; + grid_detail::dissect_box(input_box, cuts, split, 0); + + std::sort(split.begin(), split.end(), box_coordinate_order()); + std::sort(expected.begin(), expected.end(), box_coordinate_order()); + CHECK(split == expected); +} + +template +void test_directional_merge(std::vector> unmerged, std::vector> merged) { + CAPTURE(MergeDim); + std::minstd_rand rng(42); + std::shuffle(unmerged.begin(), unmerged.end(), rng); + CAPTURE(unmerged); + auto test = unmerged; + test.erase(grid_detail::merge_connected_boxes_along_dim(test.begin(), test.end()), test.end()); + std::sort(test.begin(), test.end(), box_coordinate_order()); + std::sort(merged.begin(), merged.end(), box_coordinate_order()); + CHECK(test == merged); +} + +TEST_CASE("directional merge of non-overlapping boxes - 1d", "[grid]") { + const std::vector> unmerged{ + {{0}, {2}}, + {{2}, {4}}, + {{4}, {8}}, + {{10}, {12}}, + }; + const std::vector> merged{ + {{0}, {8}}, + {{10}, {12}}, + }; + test_directional_merge<0>(unmerged, merged); +} + +TEST_CASE("directional merge of overlapping boxes - 1d", "[grid]") { + const std::vector> unmerged{ + {{0}, {6}}, + {{2}, {4}}, + {{8}, {12}}, + {{10}, {16}}, + {{16}, {18}}, + }; + const std::vector> merged{ + {{0}, {6}}, + {{8}, {18}}, + }; + test_directional_merge<0>(unmerged, merged); +} + +TEST_CASE("directional merge of non-overlapping boxes - 2d", "[grid]") { + const std::vector> unmerged{ + {{0, 0}, {2, 2}}, + {{0, 2}, {2, 4}}, + {{0, 4}, {2, 6}}, + {{2, 2}, {4, 4}}, + {{2, 4}, {4, 6}}, + {{2, 6}, {4, 8}}, + {{4, 4}, {6, 6}}, + {{4, 6}, {6, 8}}, + {{4, 8}, {6, 10}}, + }; + + const std::vector> merged_dim0{ + {{0, 0}, {2, 2}}, + {{0, 2}, {4, 4}}, + {{0, 4}, {6, 6}}, + {{2, 6}, {6, 8}}, + {{4, 8}, {6, 10}}, + }; + test_directional_merge<0>(unmerged, merged_dim0); + + const std::vector> merged_dim1{ + {{0, 0}, {2, 6}}, + {{2, 2}, {4, 8}}, + {{4, 4}, {6, 10}}, + }; + test_directional_merge<1>(unmerged, merged_dim1); + + render_boxes(unmerged, "unmerged"); + render_boxes(merged_dim0, "merged-dim0"); + render_boxes(merged_dim1, "merged-dim1"); +} + +TEST_CASE("directional merge of overlapping boxes - 2d", "[grid]") { + const std::vector> unmerged{ + {{0, 0}, {12, 3}}, + {{0, 1}, {12, 4}}, + {{0, 4}, {12, 6}}, + {{0, 8}, {12, 10}}, + {{0, 0}, {3, 12}}, + {{1, 0}, {4, 12}}, + {{4, 0}, {6, 12}}, + {{8, 0}, {10, 12}}, + }; + + const std::vector> merged_dim0{ + {{0, 0}, {12, 3}}, + {{0, 1}, {12, 4}}, + {{0, 4}, {12, 6}}, + {{0, 8}, {12, 10}}, + {{0, 0}, {6, 12}}, + {{8, 0}, {10, 12}}, + }; + test_directional_merge<0>(unmerged, merged_dim0); + + const std::vector> merged_dim1{ + {{0, 0}, {12, 6}}, + {{0, 8}, {12, 10}}, + {{0, 0}, {3, 12}}, + {{1, 0}, {4, 12}}, + {{4, 0}, {6, 12}}, + {{8, 0}, {10, 12}}, + }; + test_directional_merge<1>(unmerged, merged_dim1); + + render_boxes(unmerged, "unmerged"); + render_boxes(merged_dim0, "merged-dim0"); + render_boxes(merged_dim1, "merged-dim1"); +} + +TEST_CASE("directional merge of non-overlapping 3d boxes", "[grid]") { + const std::vector> unmerged{ + {{0, 0, 2}, {2, 2, 4}}, + {{0, 2, 0}, {2, 4, 2}}, + {{0, 2, 2}, {2, 4, 4}}, + {{2, 0, 0}, {4, 2, 2}}, + {{2, 0, 2}, {4, 2, 4}}, + {{2, 2, 0}, {4, 4, 2}}, + {{2, 2, 2}, {4, 4, 4}}, + }; + + const std::vector> merged_dim0{ + {{0, 0, 2}, {4, 2, 4}}, + {{0, 2, 0}, {4, 4, 2}}, + {{0, 2, 2}, {4, 4, 4}}, + {{2, 0, 0}, {4, 2, 2}}, + }; + test_directional_merge<0>(unmerged, merged_dim0); + + const std::vector> merged_dim1{ + {{0, 2, 0}, {2, 4, 2}}, + {{0, 0, 2}, {2, 4, 4}}, + {{2, 0, 0}, {4, 4, 2}}, + {{2, 0, 2}, {4, 4, 4}}, + }; + test_directional_merge<1>(unmerged, merged_dim1); + + const std::vector> merged_dim2{ + {{0, 0, 2}, {2, 2, 4}}, + {{0, 2, 0}, {2, 4, 4}}, + {{2, 0, 0}, {4, 2, 4}}, + {{2, 2, 0}, {4, 4, 4}}, + }; + test_directional_merge<2>(unmerged, merged_dim2); +} + +TEST_CASE("region normalization removes overlaps - 2d", "[grid]") { + const std::vector> overlapping{ + {{0, 0}, {4, 4}}, + {{2, 2}, {6, 6}}, + {{4, 8}, {5, 9}}, + }; + std::vector> normalized{ + {{0, 0}, {2, 4}}, + {{2, 0}, {4, 6}}, + {{4, 2}, {6, 6}}, + {{4, 8}, {5, 9}}, + }; + + const auto result = grid_detail::normalize(std::vector(overlapping)); + std::sort(normalized.begin(), normalized.end(), box_coordinate_order()); + CHECK(result == normalized); + + render_boxes(overlapping, "input"); + render_boxes(result, "result"); + render_boxes(normalized, "normalized"); +} + +TEST_CASE("region normalization maximizes extent of fast dimensions - 2d", "[grid]") { + const std::vector> input{ + {{0, 0}, {8, 2}}, + {{0, 2}, {2, 4}}, + {{6, 2}, {8, 4}}, + {{0, 4}, {8, 6}}, + }; + std::vector> normalized{ + {{0, 0}, {2, 6}}, + {{2, 0}, {6, 2}}, + {{2, 4}, {6, 6}}, + {{6, 0}, {8, 6}}, + }; + + const auto result = grid_detail::normalize(std::vector(input)); + std::sort(normalized.begin(), normalized.end(), box_coordinate_order()); + CHECK(result == normalized); + + render_boxes(input, "input"); + render_boxes(result, "result"); + render_boxes(normalized, "normalized"); +} + +template +std::vector> create_random_boxes(const size_t grid_size, const size_t max_box_size, const size_t num_boxes, const uint32_t seed) { + std::minstd_rand rng(seed); + std::uniform_int_distribution offset_dist(0, grid_size - 1); + std::binomial_distribution range_dist(max_box_size - 1, 0.5); + std::vector> boxes; + while(boxes.size() < num_boxes) { + subrange sr; + bool inbounds = true; + for(int d = 0; d < Dims; ++d) { + sr.offset[d] = offset_dist(rng); + sr.range[d] = 1 + range_dist(rng); + inbounds &= sr.offset[d] + sr.range[d] <= grid_size; + } + if(inbounds) { boxes.emplace_back(sr); } + } + return boxes; +} + +TEST_CASE("normalizing randomized box sets - 2d", "[grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 200}, + })); + + const auto input_2d = create_random_boxes<2>(grid_size, max_box_size, num_boxes, 42); + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(input_2d)); }; + + const auto input_3d = grid_detail::boxes_cast<3>(input_2d); + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(input_3d)); }; + + const auto normalized_2d = grid_detail::normalize(std::vector(input_2d)); + const auto normalized_3d = grid_detail::normalize(std::vector(input_3d)); + CHECK(normalized_3d == grid_detail::boxes_cast<3>(normalized_2d)); + + render_boxes(input_2d, fmt::format("{}-input", label)); + render_boxes(normalized_2d, fmt::format("{}-normalized", label)); +} + +TEST_CASE("normalizing randomized box sets - 3d", "[grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 200}, + })); + + const auto input_3d = create_random_boxes<3>(grid_size, max_box_size, num_boxes, 42); + BENCHMARK(fmt::format("{} - native", label)) { return grid_detail::normalize(std::vector(input_3d)); }; + test_utils::black_hole(grid_detail::normalize(std::vector(input_3d))); // to attach a profiler +} + +template +std::vector> create_box_tiling(const size_t n_per_side) { + const size_t length = 5; + size_t n_linear = 1; + for(int d = 0; d < Dims; ++d) { + n_linear *= n_per_side; + } + std::vector> boxes(n_linear); + for(size_t i = 0; i < n_linear; ++i) { + subrange sr; + auto dist_i = i; + for(int d = 0; d < Dims; ++d) { + sr.offset[d] = length * (dist_i % n_per_side); + sr.range[d] = length; + dist_i /= n_per_side; + } + boxes[i] = sr; + } + return boxes; +} + +TEMPLATE_TEST_CASE_SIG("normalizing a fully mergeable tiling of boxes", "[grid]", ((int Dims), Dims), 1, 2, 3) { + const auto [label, n] = GENERATE(values>({ + {"small", 4}, + {"medium", 50}, + {"large", 1000}, + })); + + const size_t n_per_side = llrint(pow(n, 1.0 / Dims)); + + const auto boxes_nd = create_box_tiling(n_per_side); + const auto normalized_nd = grid_detail::normalize(std::vector(boxes_nd)); + CHECK(normalized_nd.size() == 1); + + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(boxes_nd)); }; + + if constexpr(Dims < 3) { + const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_nd); + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(boxes_3d)); }; + } + + if constexpr(Dims == 2) { + render_boxes(boxes_nd, fmt::format("{}-input", label)); + render_boxes(normalized_nd, fmt::format("{}-normalized", label)); + } +} + +template +GridBox to_legacy_box(const box& b) { + GridPoint min; + GridPoint max; + for(int d = 0; d < Dims; ++d) { + min[d] = b.get_min()[d]; + max[d] = b.get_max()[d]; + } + return GridBox(min, max); +} + +template +std::vector> to_legacy_boxes(const std::vector>& boxes) { + std::vector> legacy_boxes(boxes.size()); + std::transform(boxes.begin(), boxes.end(), legacy_boxes.begin(), to_legacy_box); + return legacy_boxes; +} + +template +GridRegion legacy_union(const std::vector>& boxes) { + auto it = boxes.begin(); + GridRegion r(*it++); + while(it != boxes.end()) { + r = GridRegion::merge(r, *it++); + } + return r; +} + +TEMPLATE_TEST_CASE_SIG("legacy: computing the union of a fully mergeable tiling of boxes", "[legacy-grid]", ((int Dims), Dims), 1, 2, 3) { + const auto [label, n] = GENERATE(values>({ + {"small", 4}, + {"medium", 50}, + {"large", 1000}, + })); + + const size_t n_per_side = llrint(pow(n, 1.0 / Dims)); + + const auto boxes_nd = create_box_tiling(n_per_side); + const auto legacy_boxes_nd = to_legacy_boxes(boxes_nd); + + // TODO not entirely fair, we could do a tree-merge for comparison + BENCHMARK(fmt::format("{}, native", label)) { return legacy_union(legacy_boxes_nd); }; + + if constexpr(Dims < 3) { + const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_nd); + const auto legacy_boxes_3d = to_legacy_boxes(boxes_3d); + + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return legacy_union(legacy_boxes_3d); }; + } +} + +// TODO: benchmark small box sets - we want low constant overhead for the common case + +TEST_CASE("region union - 2d", "[grid]") { + const region<2> ra{{ + {{0, 0}, {3, 3}}, + {{4, 0}, {7, 3}}, + {{0, 7}, {1, 9}}, + {{4, 7}, {6, 9}}, + }}; + const region<2> rb{{ + {{2, 3}, {5, 6}}, + {{6, 3}, {9, 6}}, + {{1, 7}, {2, 9}}, + {{4, 7}, {6, 9}}, + }}; + + std::vector> expected{ + {{0, 0}, {2, 3}}, + {{2, 0}, {3, 6}}, + {{3, 3}, {4, 6}}, + {{4, 0}, {5, 6}}, + {{5, 0}, {6, 3}}, + {{6, 0}, {7, 6}}, + {{7, 3}, {9, 6}}, + {{0, 7}, {2, 9}}, + {{4, 7}, {6, 9}}, + }; + std::sort(expected.begin(), expected.end(), box_coordinate_order()); + + const auto result = region_union(ra, rb); + CHECK(result.get_boxes() == expected); + + render_boxes(ra.get_boxes(), "ra"); + render_boxes(rb.get_boxes(), "rb"); + render_boxes(expected, "expected"); + render_boxes(result.get_boxes(), "result"); +} + +TEST_CASE("region intersection - 2d", "[grid]") { + const region<2> ra{{ + {{2, 2}, {6, 6}}, + {{6, 2}, {8, 4}}, + {{8, 0}, {9, 4}}, + {{0, 12}, {3, 14}}, + {{2, 9}, {4, 11}}, + }}; + const region<2> rb{{ + {{3, 4}, {7, 8}}, + {{7, 1}, {8, 4}}, + {{8, 2}, {9, 5}}, + {{2, 9}, {3, 14}}, + }}; + + std::vector> expected{ + {{3, 4}, {6, 6}}, + {{7, 2}, {9, 4}}, + {{2, 9}, {3, 11}}, + {{2, 12}, {3, 14}}, + }; + std::sort(expected.begin(), expected.end(), box_coordinate_order()); + + const auto result = region_intersection(ra, rb); + CHECK(result.get_boxes() == expected); + + render_boxes(ra.get_boxes(), "ra"); + render_boxes(rb.get_boxes(), "rb"); + render_boxes(expected, "expected"); + render_boxes(result.get_boxes(), "result"); +} + +TEST_CASE("region difference - 2d", "[grid]") { + const region<2> ra{{ + {{0, 0}, {6, 6}}, + {{1, 8}, {4, 11}}, + {{8, 2}, {10, 4}}, + }}; + const region<2> rb{{ + {{1, 1}, {3, 3}}, + {{2, 2}, {4, 4}}, + {{0, 9}, {2, 12}}, + {{4, 11}, {6, 13}}, + {{7, 1}, {11, 5}}, + }}; + + std::vector> expected{ + {{0, 0}, {1, 6}}, + {{1, 0}, {3, 1}}, + {{3, 0}, {4, 2}}, + {{1, 3}, {2, 6}}, + {{2, 4}, {4, 6}}, + {{4, 0}, {6, 6}}, + {{1, 8}, {2, 9}}, + {{2, 8}, {4, 11}}, + }; + std::sort(expected.begin(), expected.end(), box_coordinate_order()); + + const auto result = region_difference(ra, rb); + CHECK(result.get_boxes() == expected); + + render_boxes(ra.get_boxes(), "ra"); + render_boxes(rb.get_boxes(), "rb"); + render_boxes(expected, "expected"); + render_boxes(result.get_boxes(), "result"); +} + +TEST_CASE("region normalization - 0d", "[grid]") { + std::vector> r; + auto n = r; + CHECK(grid_detail::normalize(std::vector(r)).empty()); + r.emplace_back(); + CHECK(grid_detail::normalize(std::vector(r)) == std::vector{{box<0>()}}); + r.emplace_back(); + CHECK(grid_detail::normalize(std::vector(r)) == std::vector{{box<0>()}}); +} + +TEST_CASE("region union - 0d", "[grid]") { + region<0> empty; + CHECK(empty.empty()); + region<0> unit{{box<0>{}}}; + CHECK(!unit.empty()); + CHECK(region_union(empty, empty).empty()); + CHECK(!region_union(empty, unit).empty()); + CHECK(!region_union(unit, empty).empty()); + CHECK(!region_union(unit, unit).empty()); +} + +TEST_CASE("region intersection - 0d", "[grid]") { + region<0> empty; + CHECK(empty.empty()); + region<0> unit{{box<0>{}}}; + CHECK(!unit.empty()); + CHECK(region_intersection(empty, empty).empty()); + CHECK(region_intersection(empty, unit).empty()); + CHECK(region_intersection(unit, empty).empty()); + CHECK(!region_intersection(unit, unit).empty()); +} + +TEST_CASE("region difference - 0d", "[grid]") { + region<0> empty; + CHECK(empty.empty()); + region<0> unit{{box<0>{}}}; + CHECK(!unit.empty()); + CHECK(region_difference(empty, empty).empty()); + CHECK(region_difference(empty, unit).empty()); + CHECK(!region_difference(unit, empty).empty()); + CHECK(region_difference(unit, unit).empty()); +} + +TEST_CASE("performing set operations between randomized regions - 2d", "[grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 100}, + })); + + const std::vector inputs_2d{ + region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 13)), region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 37))}; + const std::vector inputs_3d{region_cast<3>(inputs_2d[0]), region_cast<3>(inputs_2d[1])}; + + render_boxes(inputs_2d[0].get_boxes(), fmt::format("{}-input-a", label)); + render_boxes(inputs_2d[1].get_boxes(), fmt::format("{}-input-b", label)); + + BENCHMARK(fmt::format("union, {}, native", label)) { return region_union(inputs_2d[0], inputs_2d[1]); }; + BENCHMARK(fmt::format("union, {}, embedded in 3d", label)) { return region_union(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("intersection, {}, native", label)) { return region_intersection(inputs_2d[0], inputs_2d[1]); }; + BENCHMARK(fmt::format("intersection, {}, embedded in 3d", label)) { return region_intersection(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("difference, {}, native", label)) { return region_difference(inputs_2d[0], inputs_2d[1]); }; + BENCHMARK(fmt::format("difference, {}, embedded in 3d", label)) { return region_difference(inputs_3d[0], inputs_3d[1]); }; + + const auto union_2d = region_union(inputs_2d[0], inputs_2d[1]); + const auto union_3d = region_union(inputs_3d[0], inputs_3d[1]); + const auto intersection_2d = region_intersection(inputs_2d[0], inputs_2d[1]); + const auto intersection_3d = region_intersection(inputs_3d[0], inputs_3d[1]); + const auto difference_2d = region_difference(inputs_2d[0], inputs_2d[1]); + const auto difference_3d = region_difference(inputs_3d[0], inputs_3d[1]); + + CHECK(union_3d == region_cast<3>(union_2d)); + CHECK(intersection_3d == region_cast<3>(intersection_2d)); + CHECK(difference_3d == region_cast<3>(difference_2d)); + + render_boxes(union_2d.get_boxes(), fmt::format("union-{}", label)); + render_boxes(intersection_2d.get_boxes(), fmt::format("intersection-{}", label)); + render_boxes(difference_2d.get_boxes(), fmt::format("difference-{}", label)); +} + +TEST_CASE("performing set operations between randomized regions - 3d", "[grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 100}, + })); + + const std::vector inputs_3d{ + region(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 13)), region(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 37))}; + + BENCHMARK(fmt::format("union, {}, native", label)) { return region_union(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("intersection, {}, native", label)) { return region_intersection(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("difference, {}, native", label)) { return region_difference(inputs_3d[0], inputs_3d[1]); }; + + // to attach a profiler + test_utils::black_hole(region_union(inputs_3d[0], inputs_3d[1])); + test_utils::black_hole(region_intersection(inputs_3d[0], inputs_3d[1])); + test_utils::black_hole(region_difference(inputs_3d[0], inputs_3d[1])); +} + +TEST_CASE("legacy: performing set operations between randomized regions - 2d", "[legacy-grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 100}, + })); + + const std::vector inputs_2d{ + region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 13)), region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 37))}; + const std::vector inputs_3d{region_cast<3>(inputs_2d[0]), region_cast<3>(inputs_2d[1])}; + + const std::vector legacy_inputs_2d{legacy_union(to_legacy_boxes(inputs_2d[0].get_boxes())), legacy_union(to_legacy_boxes(inputs_2d[1].get_boxes()))}; + const std::vector legacy_inputs_3d{legacy_union(to_legacy_boxes(inputs_3d[0].get_boxes())), legacy_union(to_legacy_boxes(inputs_3d[1].get_boxes()))}; + + BENCHMARK(fmt::format("union, {}, native", label)) { return GridRegion<2>::merge(legacy_inputs_2d[0], legacy_inputs_2d[1]); }; + BENCHMARK(fmt::format("union, {}, embedded in 3d", label)) { return GridRegion<3>::merge(legacy_inputs_3d[0], legacy_inputs_3d[1]); }; + BENCHMARK(fmt::format("intersection, {}, native", label)) { return GridRegion<2>::intersect(legacy_inputs_2d[0], legacy_inputs_2d[1]); }; + BENCHMARK(fmt::format("intersection, {}, embedded in 3d", label)) { return GridRegion<3>::intersect(legacy_inputs_3d[0], legacy_inputs_3d[1]); }; + BENCHMARK(fmt::format("difference, {}, native", label)) { return GridRegion<2>::difference(legacy_inputs_2d[0], legacy_inputs_2d[1]); }; + BENCHMARK(fmt::format("difference, {}, embedded in 3d", label)) { return GridRegion<3>::difference(legacy_inputs_3d[0], legacy_inputs_3d[1]); }; +} + +TEST_CASE("legacy: performing set operations between randomized regions - 3d", "[legacy-grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + //{"medium", 50, 1, 50}, + //{"large", 200, 20, 100}, + })); + + const std::vector inputs_3d{legacy_union(to_legacy_boxes(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 13))), + legacy_union(to_legacy_boxes(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 37)))}; + + BENCHMARK(fmt::format("union, {}, native", label)) { return GridRegion<3>::merge(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("intersection, {}, native", label)) { return GridRegion<3>::intersect(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("difference, {}, native", label)) { return GridRegion<3>::difference(inputs_3d[0], inputs_3d[1]); }; +} + +std::vector> create_interlocking_boxes(const size_t num_boxes_per_side) { + std::vector> boxes; + for(size_t i = 0; i < num_boxes_per_side; ++i) { + boxes.emplace_back(id<2>(i, i), id<2>(i + 1, num_boxes_per_side)); + boxes.emplace_back(id<2>(i + 1, i), id<2>(num_boxes_per_side, i + 1)); + } + return boxes; +} + +TEST_CASE("normalizing a fully mergeable, complex tiling of boxes - 2d", "[grid]") { + const auto [label, n] = GENERATE(values>({ + {"small", 10}, + {"large", 200}, + })); + + const auto boxes_2d = create_interlocking_boxes(n); + const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_2d); + + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(boxes_2d)); }; + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(boxes_3d)); }; + + render_boxes(boxes_2d, fmt::format("{}-input", label)); +} + +TEST_CASE("legacy: normalizing a fully mergeable, complex tiling of boxes - 2d", "[legacy-grid]") { + const auto [label, n] = GENERATE(values>({ + {"small", 10}, + {"large", 200}, + })); + + const auto boxes_2d = create_interlocking_boxes(n); + const auto legacy_boxes_2d = to_legacy_boxes(boxes_2d); + const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_2d); + const auto legacy_boxes_3d = to_legacy_boxes(boxes_3d); + + BENCHMARK(fmt::format("{}, native", label)) { return legacy_union(legacy_boxes_2d); }; + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return legacy_union(legacy_boxes_3d); }; +} \ No newline at end of file diff --git a/test/test_utils.h b/test/test_utils.h index 789057fa7..906e979ea 100644 --- a/test/test_utils.h +++ b/test/test_utils.h @@ -1,5 +1,7 @@ #pragma once +#include "catch2/benchmark/catch_clock.hpp" +#include "catch2/benchmark/catch_optimizer.hpp" #include #include #include @@ -360,6 +362,11 @@ namespace test_utils { ~task_test_context() { maybe_print_task_graph(trec); } }; + template + void black_hole(T&& v) { + Catch::Benchmark::keep_memory(&v); + } + } // namespace test_utils } // namespace celerity @@ -373,7 +380,7 @@ struct StringMaker> { case 1: return fmt::format("{{{}}}", value[0]); case 2: return fmt::format("{{{}, {}}}", value[0], value[1]); case 3: return fmt::format("{{{}, {}, {}}}", value[0], value[1], value[2]); - default: return {}; + default: return "{}"; } } }; @@ -385,7 +392,7 @@ struct StringMaker> { case 1: return fmt::format("{{{}}}", value[0]); case 2: return fmt::format("{{{}, {}}}", value[0], value[1]); case 3: return fmt::format("{{{}, {}, {}}}", value[0], value[1], value[2]); - default: return {}; + default: return "{}"; } } }; From 2ff28d59b048bb1381cd47f980ff59a0f472f812 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Wed, 2 Aug 2023 20:32:26 +0200 Subject: [PATCH 04/17] Port runtime to new grid implementation --- CMakeLists.txt | 1 - include/buffer_transfer_manager.h | 16 +- include/command.h | 8 +- include/distributed_graph_generator.h | 4 +- include/recorders.h | 4 +- include/region_map.h | 213 ++++++++++---------- include/task.h | 4 +- src/buffer_manager.cc | 88 ++++---- src/buffer_transfer_manager.cc | 2 +- src/distributed_graph_generator.cc | 64 +++--- src/print_graph.cc | 13 +- src/print_utils.cc | 24 --- src/recorders.cc | 2 +- src/task.cc | 12 +- src/task_manager.cc | 19 +- src/worker_job.cc | 6 +- test/accessor_tests.cc | 30 +-- test/backend_tests.cc | 10 +- test/graph_gen_granularity_tests.cc | 6 +- test/graph_generation_tests.cc | 2 +- test/integration/backend.cc | 52 +++-- test/print_graph_tests.cc | 48 ++--- test/region_map_tests.cc | 276 +++++++++++++------------- test/runtime_tests.cc | 12 +- test/system/distr_tests.cc | 26 +-- test/task_graph_tests.cc | 2 +- test/test_utils.h | 79 ++++++-- 27 files changed, 528 insertions(+), 495 deletions(-) delete mode 100644 src/print_utils.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index ebf9da7df..45dc4525d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -193,7 +193,6 @@ set(SOURCES src/graph_serializer.cc src/grid.cc src/print_graph.cc - src/print_utils.cc src/recorders.cc src/runtime.cc src/scheduler.cc diff --git a/include/buffer_transfer_manager.h b/include/buffer_transfer_manager.h index 474c7440b..b5dc309b5 100644 --- a/include/buffer_transfer_manager.h +++ b/include/buffer_transfer_manager.h @@ -63,23 +63,23 @@ namespace detail { struct incoming_transfer_handle : transfer_handle { incoming_transfer_handle(const size_t num_nodes) : m_num_nodes(num_nodes) {} - void set_expected_region(GridRegion<3> region) { m_expected_region = std::move(region); } + void set_expected_region(region<3> region) { m_expected_region = std::move(region); } void add_transfer(std::unique_ptr&& t) { assert(!complete); assert(t->frame->rid == 0 || m_is_reduction || m_transfers.empty()); // Either all or none m_is_reduction = t->frame->rid != 0; - const auto box = subrange_to_grid_box(t->frame->sr); - assert(GridRegion<3>::intersect(m_received_region, box).empty() || m_is_reduction); - assert(!m_expected_region.has_value() || GridRegion<3>::difference(box, *m_expected_region).empty()); - m_received_region = GridRegion<3>::merge(m_received_region, box); + const auto box = detail::box(t->frame->sr); + assert(region_intersection(m_received_region, box).empty() || m_is_reduction); + assert(!m_expected_region.has_value() || region_difference(box, *m_expected_region).empty()); + m_received_region = region_union(m_received_region, box); m_transfers.push_back(std::move(t)); } bool received_full_region() const { if(!m_expected_region.has_value()) return false; if(m_is_reduction) { - assert(m_expected_region->area() == 1); + assert(m_expected_region->get_area() == 1); // For reductions we're waiting to receive one message per peer return m_transfers.size() == m_num_nodes - 1; } @@ -99,8 +99,8 @@ namespace detail { size_t m_num_nodes; // Number of nodes in the system, required for reductions bool m_is_reduction = false; std::vector> m_transfers; - std::optional> m_expected_region; // This will only be set once the await push job has started - GridRegion<3> m_received_region; + std::optional> m_expected_region; // This will only be set once the await push job has started + region<3> m_received_region; }; struct transfer_out { diff --git a/include/command.h b/include/command.h index 312091a49..ea1ebd10e 100644 --- a/include/command.h +++ b/include/command.h @@ -70,14 +70,14 @@ namespace detail { class await_push_command final : public abstract_command { friend class command_graph; - await_push_command(command_id cid, buffer_id bid, reduction_id rid, transfer_id trid, GridRegion<3> region) + await_push_command(command_id cid, buffer_id bid, reduction_id rid, transfer_id trid, region<3> region) : abstract_command(cid), m_bid(bid), m_rid(rid), m_trid(trid), m_region(std::move(region)) {} public: buffer_id get_bid() const { return m_bid; } reduction_id get_reduction_id() const { return m_rid; } transfer_id get_transfer_id() const { return m_trid; } - GridRegion<3> get_region() const { return m_region; } + region<3> get_region() const { return m_region; } private: buffer_id m_bid; @@ -85,7 +85,7 @@ namespace detail { // but it allows us to sanity check that they match as well as include the ID during graph printing. reduction_id m_rid; transfer_id m_trid; - GridRegion<3> m_region; + region<3> m_region; }; class reduction_command final : public abstract_command { @@ -184,7 +184,7 @@ namespace detail { buffer_id bid; reduction_id rid; transfer_id trid; - GridRegion<3> region; + region<3> region; }; struct reduction_data { diff --git a/include/distributed_graph_generator.h b/include/distributed_graph_generator.h index f28d54cf8..bbe74641f 100644 --- a/include/distributed_graph_generator.h +++ b/include/distributed_graph_generator.h @@ -100,7 +100,7 @@ class distributed_graph_generator { void generate_distributed_commands(const task& tsk); void generate_anti_dependencies( - task_id tid, buffer_id bid, const region_map& last_writers_map, const GridRegion<3>& write_req, abstract_command* write_cmd); + task_id tid, buffer_id bid, const region_map& last_writers_map, const region<3>& write_req, abstract_command* write_cmd); void process_task_side_effect_requirements(const task& tsk); @@ -117,7 +117,7 @@ class distributed_graph_generator { void prune_commands_before(const command_id epoch); private: - using buffer_read_map = std::unordered_map>; + using buffer_read_map = std::unordered_map>; using side_effect_map = std::unordered_map; size_t m_num_nodes; diff --git a/include/recorders.h b/include/recorders.h index ed0771790..caf45b8c6 100644 --- a/include/recorders.h +++ b/include/recorders.h @@ -14,7 +14,7 @@ struct access_record { const buffer_id bid; const std::string buffer_name; const access_mode mode; - const GridRegion<3> req; + const region<3> req; }; using access_list = std::vector; @@ -80,7 +80,7 @@ struct command_record { const std::optional buffer_id; const std::string buffer_name; const std::optional target; - const std::optional> await_region; + const std::optional> await_region; const std::optional> push_range; const std::optional transfer_id; const std::optional task_id; diff --git a/include/region_map.h b/include/region_map.h index 5ee1c798f..2ee9facaa 100644 --- a/include/region_map.h +++ b/include/region_map.h @@ -37,7 +37,7 @@ namespace region_map_detail { constexpr size_t min_children = 2; template - bool is_lo_inside(const GridBox& a, const GridBox& b) { + bool is_lo_inside(const box& a, const box& b) { static_assert(D < Dims); const auto a_min = a.get_min(); if(a_min[D] <= b.get_min()[D]) return false; @@ -46,7 +46,7 @@ namespace region_map_detail { } template - bool is_hi_inside(const GridBox& a, const GridBox& b) { + bool is_hi_inside(const box& a, const box& b) { static_assert(D < Dims); const auto a_max = a.get_max(); if(a_max[D] <= b.get_min()[D]) return false; @@ -54,8 +54,8 @@ namespace region_map_detail { return true; } - template - GridBox compute_bounding_box(const GridBox& a, const GridBox& b) { + template + box compute_bounding_box(const box& a, const box& b) { const auto min_a = a.get_min(); const auto min_b = b.get_min(); const auto max_a = a.get_max(); @@ -69,13 +69,13 @@ namespace region_map_detail { return {new_min, new_max}; } - template - bool do_overlap(const GridBox& a, const GridBox& b) { - return a.intersectsWith(b); + template + bool do_overlap(const box& a, const box& b) { + return !box_intersection(a, b).empty(); } - template - bool is_inside(const GridBox& box, const GridPoint& point) { + template + bool is_inside(const box& box, const id& point) { auto box_min = box.get_min(); auto box_max = box.get_max(); bool inside = true; @@ -86,17 +86,6 @@ namespace region_map_detail { return inside; } - template - GridBox box_cast(const GridBox& other) { - GridPoint min; - GridPoint max; - for(size_t o = 0; o < DimsOut; ++o) { - min[o] = o < DimsIn ? other.get_min()[o] : 0; - max[o] = o < DimsIn ? other.get_max()[o] : 1; - } - return GridBox(min, max); - } - /** * Check that the region map's tree structure is in a good state: * - Root bounding box is equal to extent @@ -111,7 +100,7 @@ namespace region_map_detail { rm.m_root->sanity_check_bounding_boxes(); size_t max_depth = 0; - std::queue, const typename RegionMap::types::inner_node_type*>> node_queue; + std::queue, const typename RegionMap::types::inner_node_type*>> node_queue; node_queue.push(std::make_pair(rm.m_root->get_bounding_box(), rm.m_root.get())); while(!node_queue.empty()) { @@ -138,13 +127,13 @@ namespace region_map_detail { #endif } - template + template class inner_node; /** * Convenience types shared by inner_node and region_map_impl. */ - template + template class region_map_types { public: static_assert(Dims <= 3); @@ -152,30 +141,30 @@ namespace region_map_detail { using inner_node_type = inner_node; using unique_inner_node_ptr = std::unique_ptr; using inner_node_child_type = std::variant; - using entry = std::pair, ValueType>; + using entry = std::pair, ValueType>; struct insert_node_action { - GridBox box; + box box; ValueType value; bool processed_locally = false; }; struct erase_node_action { - GridBox box; + box box; bool processed_locally = false; }; using update_action = std::variant; - using orphan = std::pair, inner_node_child_type>; + using orphan = std::pair, inner_node_child_type>; struct insert_result { unique_inner_node_ptr spilled_node; // This should always be the same as spilled_node->get_bounding_box (TODO: assert?) - GridBox spilled_box; + box spilled_box; }; }; - template + template class inner_node { friend struct celerity::detail::region_map_testspy; @@ -232,7 +221,7 @@ namespace region_map_detail { * @param actions The list of erase and insert actions required to create a hole for the new entry. * @returns True if a localized update operation was performed that may require a bounding box recomputation. */ - bool update_box(const GridBox& box, const ValueType& value, std::vector& actions) { + bool update_box(const box& box, const ValueType& value, std::vector& actions) { if(!m_contains_leaves) { bool any_child_did_local_update = false; for(size_t i = 0; i < m_child_boxes.size(); ++i) { @@ -269,21 +258,31 @@ namespace region_map_detail { // Partial overlap. Check in each dimension which sides of the box intersect with the current box, creating new boxes along the way. // TODO PERF: A split may not even be necessary, if the value remains the same. Is this something worth optimizing for? - GridBox remainder = child_box; + detail::box remainder = child_box; const auto& child_value = get_child_value(i); const auto split_along = [&](const auto dim) { if(is_lo_inside(box, child_box)) { - auto new_box = remainder; - new_box.get_max()[dim.value] = box.get_min()[dim.value]; - remainder.get_min()[dim.value] = box.get_min()[dim.value]; + auto new_box_max = remainder.get_max(); + new_box_max[dim.value] = box.get_min()[dim.value]; + const auto new_box = detail::box(remainder.get_min(), new_box_max); + + auto new_remainder_min = remainder.get_min(); + new_remainder_min[dim.value] = box.get_min()[dim.value]; + remainder = detail::box(new_remainder_min, remainder.get_max()); + actions.push_back(typename types::insert_node_action{new_box, child_value}); } if(is_hi_inside(box, child_box)) { - auto new_box = remainder; - new_box.get_min()[dim.value] = box.get_max()[dim.value]; - remainder.get_max()[dim.value] = box.get_max()[dim.value]; + auto new_box_min = remainder.get_min(); + new_box_min[dim.value] = box.get_max()[dim.value]; + const auto new_box = detail::box(new_box_min, remainder.get_max()); + + auto new_remainder_max = remainder.get_max(); + new_remainder_max[dim.value] = box.get_max()[dim.value]; + remainder = detail::box(remainder.get_min(), new_remainder_max); + actions.push_back(typename types::insert_node_action{new_box, child_value}); } }; @@ -362,14 +361,14 @@ namespace region_map_detail { * * TODO: Structurally very similar to insert_subtree - can we DRY up? */ - std::optional insert(const GridBox& box, const ValueType& value) { + std::optional insert(const box& box, const ValueType& value) { if(!m_contains_leaves) { // Value belongs deeper into the tree. Find child that best fits it. // TODO PERF: Resolve ties in area increase according to [Guttman 1984] size_t best_i = std::numeric_limits::max(); size_t smallest_area_delta = std::numeric_limits::max(); for(size_t i = 0; i < m_child_boxes.size(); ++i) { - const auto area_delta = compute_bounding_box(m_child_boxes[i], box).area() - m_child_boxes[i].area(); + const auto area_delta = compute_bounding_box(m_child_boxes[i], box).get_area() - m_child_boxes[i].get_area(); if(area_delta < smallest_area_delta) { smallest_area_delta = area_delta; best_i = i; @@ -433,8 +432,8 @@ namespace region_map_detail { // Greedily assign all values to groups, O(N^2) auto bbox1 = m_child_boxes[seed1]; auto bbox2 = m_child_boxes[seed2]; - auto area1 = bbox1.area(); - auto area2 = bbox2.area(); + auto area1 = bbox1.get_area(); + auto area2 = bbox2.get_area(); std::vector assigned(m_children.size(), false); assigned[seed1] = true; assigned[seed2] = true; @@ -442,7 +441,7 @@ namespace region_map_detail { while(num_assigned < m_children.size()) { size_t smallest_area_delta = std::numeric_limits::max(); size_t smallest_i = std::numeric_limits::max(); - GridBox smallest_bbox; + detail::box smallest_bbox; size_t smallest_area = 0; size_t target_node = 0; @@ -451,8 +450,8 @@ namespace region_map_detail { const auto new_bbox1 = compute_bounding_box(m_child_boxes[i], bbox1); const auto new_bbox2 = compute_bounding_box(m_child_boxes[i], bbox2); - const auto new_area1 = new_bbox1.area(); - const auto new_area2 = new_bbox2.area(); + const auto new_area1 = new_bbox1.get_area(); + const auto new_area2 = new_bbox2.get_area(); const auto ad1 = (new_area1 - area1); const auto ad2 = (new_area2 - area2); @@ -514,7 +513,7 @@ namespace region_map_detail { * * TODO: Structurally very similar to insert - can we DRY up? */ - std::optional insert_subtree(const GridBox& box, std::unique_ptr>&& subtree) { + std::optional insert_subtree(const box& box, std::unique_ptr>&& subtree) { assert(!m_contains_leaves); assert(subtree->m_depth > m_depth); @@ -525,7 +524,7 @@ namespace region_map_detail { size_t best_i = std::numeric_limits::max(); size_t smallest_area_delta = std::numeric_limits::max(); for(size_t i = 0; i < m_child_boxes.size(); ++i) { - const auto area_delta = compute_bounding_box(m_child_boxes[i], box).area() - m_child_boxes[i].area(); + const auto area_delta = compute_bounding_box(m_child_boxes[i], box).get_area() - m_child_boxes[i].get_area(); if(area_delta < smallest_area_delta) { smallest_area_delta = area_delta; best_i = i; @@ -576,7 +575,7 @@ namespace region_map_detail { const auto new_bbox2 = compute_bounding_box(bbox2, m_child_boxes[i]); // Assign value to node that results in smaller area increase. - if((new_bbox1.area() - bbox1.area()) < (new_bbox2.area() - bbox2.area())) { + if((new_bbox1.get_area() - bbox1.get_area()) < (new_bbox2.get_area() - bbox2.get_area())) { node1->insert_child_node(m_child_boxes[i], std::move(std::get(m_children[i]))); bbox1 = new_bbox1; } else { @@ -607,7 +606,7 @@ namespace region_map_detail { * @param orphans A list of entries or subtrees that were orphaned due to dissolving a node. * @returns True if the box was erased in this subtree. */ - bool erase(const GridBox& box, std::vector& orphans) { + bool erase(const box& box, std::vector& orphans) { bool did_erase = false; if(!m_contains_leaves) { @@ -649,7 +648,7 @@ namespace region_map_detail { /** * Recursively finds all entries that intersect with box. */ - void query(const GridBox& box, std::vector& intersecting) const { + void query(const box& box, std::vector& intersecting) const { if(!m_contains_leaves) { for(size_t i = 0; i < m_children.size(); ++i) { if(do_overlap(m_child_boxes[i], box)) { get_child_node(i).query(box, intersecting); } @@ -664,7 +663,7 @@ namespace region_map_detail { /** * Returns the entry containing a given point, if such an entry exists. */ - std::optional point_query(const GridPoint& point) const { + std::optional point_query(const id& point) const { for(size_t i = 0; i < m_children.size(); ++i) { if(is_inside(m_child_boxes[i], point)) { if(!m_contains_leaves) { @@ -688,16 +687,16 @@ namespace region_map_detail { } // NOTE: Not O(1)! - GridBox get_bounding_box() const { + box get_bounding_box() const { assert(!m_child_boxes.empty()); - GridBox bbox = m_child_boxes[0]; + box bbox = m_child_boxes[0]; for(size_t i = 1; i < m_child_boxes.size(); ++i) { bbox = compute_bounding_box(bbox, m_child_boxes[i]); } return bbox; } - void insert_child_node(const GridBox& box, std::unique_ptr&& node) { + void insert_child_node(const box& box, std::unique_ptr&& node) { assert(m_children.size() < max_children + 1); // During splits we temporarily go one above the max m_child_boxes.push_back(box); m_children.emplace_back(std::move(node)); @@ -746,7 +745,7 @@ namespace region_map_detail { bool m_contains_leaves; // TODO PERF: Consider storing these in small vectors - std::vector> m_child_boxes; + std::vector> m_child_boxes; std::vector m_children; inner_node& get_child_node(size_t index) { return *std::get(m_children[index]); } @@ -755,12 +754,12 @@ namespace region_map_detail { ValueType& get_child_value(size_t index) { return std::get(m_children[index]); } const ValueType& get_child_value(size_t index) const { return std::get(m_children[index]); } - void insert_child_value(const GridBox& box, const ValueType& value) { + void insert_child_value(const box& box, const ValueType& value) { assert(m_children.size() < max_children + 1); // During splits we temporarily go one above the max #if !defined(NDEBUG) for(auto& b : m_child_boxes) { // New box must not overlap with any other - assert(GridRegion::intersect(b, box).empty()); + assert(box_intersection(b, box).empty()); } #endif m_child_boxes.push_back(box); @@ -779,7 +778,7 @@ namespace region_map_detail { size_t worst_j = std::numeric_limits::max(); for(size_t i = 0; i < m_child_boxes.size(); ++i) { for(size_t j = i + 1; j < m_child_boxes.size(); ++j) { - const auto area = compute_bounding_box(m_child_boxes[i], m_child_boxes[j]).area(); + const auto area = compute_bounding_box(m_child_boxes[i], m_child_boxes[j]).get_area(); if(area > worst_area) { worst_area = area; worst_i = i; @@ -794,14 +793,14 @@ namespace region_map_detail { bool is_underfull() const { return m_children.size() < min_children; } - GridBox sanity_check_bounding_boxes() const { + box sanity_check_bounding_boxes() const { #if !defined(NDEBUG) - // After an erase this node might not have any children. Return empty box in that case. - if(m_child_boxes.empty()) { return box_cast(GridBox<3>({0, 0, 0}, {0, 0, 0})); } + // After an erase this node might not have any children. Return empty box in that case. TODO this breaks for Dims == 0 (where area is always 1)! + if(m_child_boxes.empty()) { return box_cast(box<3>({0, 0, 0}, {0, 0, 0})); } - GridBox result = m_child_boxes[0]; + box result = m_child_boxes[0]; for(size_t i = 1; i < m_child_boxes.size(); ++i) { - const GridBox child_box = m_contains_leaves ? m_child_boxes[i] : get_child_node(i).sanity_check_bounding_boxes(); + const box child_box = m_contains_leaves ? m_child_boxes[i] : get_child_node(i).sanity_check_bounding_boxes(); assert(m_child_boxes[i] == child_box); result = compute_bounding_box(result, child_box); } @@ -811,24 +810,17 @@ namespace region_map_detail { } }; - inline void assert_dimensionality(const GridBox<3>& box, const int dims) { + inline void assert_dimensionality(const box<3>& box, const int dims) { #if !defined(NDEBUG) - const auto& min = box.get_min(); - const auto& max = box.get_max(); - if(dims < 3) { - assert(min[2] == 0); - assert(max[2] == 1); - } - if(dims == 1) { - assert(min[1] == 0); - assert(max[1] == 1); - } + assert(box.get_min_dimensions() <= dims); #endif } - inline void assert_dimensionality(const GridRegion<3>& reg, const int dims) { + inline void assert_dimensionality(const region<3>& reg, const int dims) { #if !defined(NDEBUG) - reg.scanByBoxes([&](const GridBox<3>& box) { assert_dimensionality(box, dims); }); + for(const auto& box : reg.get_boxes()) { + assert_dimensionality(box, dims); + } #endif } @@ -844,7 +836,7 @@ namespace region_map_detail { * TODO PERF: Try to minimize the number of value copies we do during intermediate steps (e.g. when merging) * TODO PERF: Look into bulk-loading algorithms for updating multiple boxes at once */ - template + template class region_map_impl { friend struct celerity::detail::region_map_testspy; using types = region_map_types; @@ -854,8 +846,7 @@ namespace region_map_detail { static constexpr size_t dimensions = Dims; region_map_impl(const range& extent, ValueType default_value = ValueType{}) - : m_extent(subrange_to_grid_box(subrange{id_cast(id<3>{0, 0, 0}), extent})), - m_root(std::make_unique(true, 0)) { + : m_extent(subrange({}, extent)), m_root(std::make_unique(true, 0)) { m_root->insert(this->m_extent, default_value); } @@ -879,10 +870,10 @@ namespace region_map_detail { * 3) Attempt to merge the box as well as any other newly created boxes * with their surrounding entries. */ - void update_box(const GridBox& box, const ValueType& value) { + void update_box(const box& box, const ValueType& value) { assert(m_root != nullptr && "Moved from?"); - const auto clamped_box = GridBox::intersect(m_extent, box); + const auto clamped_box = box_intersection(m_extent, box); // This can happen e.g. for empty buffers, or if the box is // completely outside the region map's extent for some reason. @@ -904,18 +895,18 @@ namespace region_map_detail { #if !defined(NDEBUG) // Sanity check: Erased and inserted boxes must cover the same space - GridRegion erased; - GridRegion inserted; + region erased; + region inserted; for(const auto& a : m_update_actions) { utils::match( a, [&](const typename types::erase_node_action& erase_action) { - assert(GridRegion::intersect(erased, erase_action.box).empty()); - erased = GridRegion::merge(erased, erase_action.box); + assert(region_intersection(erased, erase_action.box).empty()); + erased = region_union(erased, erase_action.box); }, [&](const typename types::insert_node_action& insert_action) { - assert(GridRegion::intersect(inserted, insert_action.box).empty()); - inserted = GridRegion::merge(inserted, insert_action.box); + assert(region_intersection(inserted, insert_action.box).empty()); + inserted = region_union(inserted, insert_action.box); }); } assert(erased == inserted); @@ -968,7 +959,7 @@ namespace region_map_detail { * * TODO PERF: In most cases we are unlikely to store the returned values, and the copy is unnecessary. Return const reference instead? */ - std::vector get_region_values(const GridBox& request) const { + std::vector get_region_values(const box& request) const { assert(m_root != nullptr && "Moved from?"); m_query_results_raw.clear(); @@ -993,7 +984,7 @@ namespace region_map_detail { clamped_min[d] = std::max(v_min[d], r_min[d]); clamped_max[d] = std::min(v_max[d], r_max[d]); } - m_query_results_clamped.push_back(std::make_pair(GridBox{clamped_min, clamped_max}, v)); + m_query_results_clamped.push_back(std::make_pair(box{clamped_min, clamped_max}, v)); } #else std::swap(m_query_results_raw, m_query_results_clamped); @@ -1043,7 +1034,7 @@ namespace region_map_detail { return m_root->format_to(out, 0); } - range get_extent() const { return grid_box_to_subrange(m_extent).range; } + range get_extent() const { return m_extent.get_range(); } private: template @@ -1051,7 +1042,7 @@ namespace region_map_detail { // The extent specifies the boundaries for the region map to which all entries are clamped, // and which initially contains the default value. Currently always starts at [0,0,0]. - GridBox m_extent; + box m_extent; std::unique_ptr m_root; @@ -1069,7 +1060,7 @@ namespace region_map_detail { * Inserts a new entry into the tree. * Precondition: The insert location must be empty. */ - void insert(const GridBox& box, const ValueType& value) { + void insert(const box& box, const ValueType& value) { auto ret = m_root->insert(box, value); if(ret.has_value()) { reroot(std::move(*ret)); } } @@ -1077,7 +1068,7 @@ namespace region_map_detail { /** * Inserts a subtree (either from a dissolved parent or after a split) into the tree. */ - void insert_subtree(const GridBox& box, typename types::unique_inner_node_ptr&& subtree) { + void insert_subtree(const box& box, typename types::unique_inner_node_ptr&& subtree) { auto ret = m_root->insert_subtree(box, std::move(subtree)); if(ret.has_value()) { reroot(std::move(*ret)); } } @@ -1099,7 +1090,7 @@ namespace region_map_detail { * Erases a box from the tree. If the parent box becomes underfull it is dissolved and its children * are reinserted. */ - void erase(const GridBox& box) { + void erase(const box& box) { m_erase_orphans.clear(); [[maybe_unused]] const auto did_erase = m_root->erase(box, m_erase_orphans); assert(did_erase); @@ -1122,7 +1113,7 @@ namespace region_map_detail { * Calculates whether two boxes can be merged. In order to be mergeable, the two boxes * have to touch in one dimension and match exactly in all remaining dimensions. */ - bool can_merge(const GridBox& box_a, const GridBox& box_b) const { + bool can_merge(const box& box_a, const box& box_b) const { bool adjacent = false; for(size_t d = 0; d < Dims; ++d) { if(box_a.get_min()[d] != box_b.get_min()[d] || box_a.get_max()[d] != box_b.get_max()[d]) { @@ -1146,10 +1137,10 @@ namespace region_map_detail { void try_merge(std::vector&& merge_candidates) { #if !defined(NDEBUG) // Sanity check: Merge candidates do not overlap - GridRegion candidate_union; + region candidate_union; for(auto& [box, value] : merge_candidates) { - assert(GridRegion::intersect(candidate_union, box).empty()); - candidate_union = GridRegion::merge(candidate_union, box); + assert(region_intersection(candidate_union, box).empty()); + candidate_union = region_union(candidate_union, box); } #endif @@ -1170,7 +1161,7 @@ namespace region_map_detail { for(size_t d = 0; d < Dims; ++d) { const auto min = box.get_min(); const auto max = box.get_max(); - std::optional> other_box; + std::optional> other_box; if(min[d] > 0) { auto probe = min; probe[d] -= 1; @@ -1236,9 +1227,9 @@ namespace region_map_detail { public: region_map_impl(const range<0>& /* extent */, ValueType default_value) : m_value(default_value) {} - void update_box(const GridBox<1>& /* box */, const ValueType& value) { m_value = value; } + void update_box(const box<1>& /* box */, const ValueType& value) { m_value = value; } - std::vector, ValueType>> get_region_values(const GridBox<1>& /* request */) const { return {{GridBox<1>{0, 1}, m_value}}; } + std::vector, ValueType>> get_region_values(const box<1>& /* request */) const { return {{box<1>{0, 1}, m_value}}; } template void apply_to_values(const Functor& f) { @@ -1267,7 +1258,7 @@ class region_map { */ region_map(range<3> extent, int dims, ValueType default_value = ValueType{}) : m_dims(dims) { using namespace region_map_detail; - assert_dimensionality(subrange_to_grid_box(subrange<3>{id<3>{}, extent}), dims); + assert_dimensionality(box<3>(subrange<3>{id<3>{}, extent}), dims); switch(m_dims) { case 0: m_region_map.template emplace>(range_cast<0>(extent), default_value); break; case 1: m_region_map.template emplace>(range_cast<1>(extent), default_value); break; @@ -1280,15 +1271,17 @@ class region_map { /** * Sets a new value for the provided region within the region map. */ - void update_region(const GridRegion<3>& region, const ValueType& value) { + void update_region(const region<3>& region, const ValueType& value) { region_map_detail::assert_dimensionality(region, m_dims); - region.scanByBoxes([&](const GridBox<3>& box) { update_box(box, value); }); + for(const auto& box : region.get_boxes()) { + update_box(box, value); + } } /** * Sets a new value for the provided box within the region map. */ - void update_box(const GridBox<3>& box, const ValueType& value) { + void update_box(const box<3>& box, const ValueType& value) { using namespace region_map_detail; switch(m_dims) { case 0: get_map<0>().update_box(box_cast<1>(box), value); break; @@ -1304,13 +1297,13 @@ class region_map { * * @returns A list of boxes clamped to the request region, and their associated values. */ - std::vector, ValueType>> get_region_values(const GridRegion<3>& request) const { + std::vector, ValueType>> get_region_values(const region<3>& request) const { region_map_detail::assert_dimensionality(request, m_dims); - std::vector, ValueType>> results; - request.scanByBoxes([&](const GridBox<3>& box) { + std::vector, ValueType>> results; + for(const auto& box : request.get_boxes()) { const auto r = get_region_values(box); results.insert(results.begin(), r.cbegin(), r.cend()); - }); + } return results; } @@ -1319,9 +1312,9 @@ class region_map { * * @returns A list of boxes clamped to the request box, and their associated values. */ - std::vector, ValueType>> get_region_values(const GridBox<3>& request) const { + std::vector, ValueType>> get_region_values(const box<3>& request) const { using namespace region_map_detail; - std::vector, ValueType>> results; + std::vector, ValueType>> results; switch(m_dims) { // TODO: AllScale box doesn't support 0 dimensions, fall back to 1 case 0: { diff --git a/include/task.h b/include/task.h index 1b4aa9fbe..e69b1d719 100644 --- a/include/task.h +++ b/include/task.h @@ -104,10 +104,10 @@ namespace detail { * * @returns The region obtained by merging the results of all range-mappers for this buffer and mode */ - GridRegion<3> get_mode_requirements( + region<3> get_mode_requirements( const buffer_id bid, const access_mode mode, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const; - GridBox<3> get_requirements_for_nth_access(const size_t n, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const; + box<3> get_requirements_for_nth_access(const size_t n, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const; private: std::vector>> m_accesses; diff --git a/src/buffer_manager.cc b/src/buffer_manager.cc index 00c89855e..e61b1c183 100644 --- a/src/buffer_manager.cc +++ b/src/buffer_manager.cc @@ -33,7 +33,7 @@ namespace detail { void buffer_manager::get_buffer_data(buffer_id bid, const subrange<3>& sr, void* out_linearized) { std::unique_lock lock(m_mutex); assert(m_buffers.count(bid) == 1 && (m_buffers.at(bid).device_buf.is_allocated() || m_buffers.at(bid).host_buf.is_allocated())); - auto data_locations = m_newest_data_location.at(bid).get_region_values(subrange_to_grid_box(sr)); + auto data_locations = m_newest_data_location.at(bid).get_region_values(region(sr)); // Slow path: We need to obtain current data from both host and device. if(data_locations.size() > 1) { @@ -50,13 +50,13 @@ namespace detail { } existing_buf = make_buffer_subrange_coherent(bid, access_mode::read, std::move(existing_buf), sr, std::move(replacement_buf)); - data_locations = {{subrange_to_grid_box(sr), data_location::host}}; + data_locations = {{box(sr), data_location::host}}; } // get_buffer_data will race with pending transfers for the same subrange. In case there are pending transfers and a host buffer does not exist yet, // these transfers cannot easily be flushed here as creating a host buffer requires a templated context that knows about DataT. assert(std::none_of(m_scheduled_transfers[bid].begin(), m_scheduled_transfers[bid].end(), - [&](const transfer& t) { return subrange_to_grid_box(sr).intersectsWith(subrange_to_grid_box(t.sr)); })); + [&](const transfer& t) { return !box_intersection(box(sr), box(t.sr)).empty(); })); if(data_locations[0].second == data_location::host || data_locations[0].second == data_location::host_and_device) { return m_buffers.at(bid).host_buf.storage->get_data({m_buffers.at(bid).host_buf.get_local_offset(sr.offset), sr.range}, out_linearized); @@ -130,14 +130,11 @@ namespace detail { // Use faux host accesses to retain all data from the device (except what is going to be discarded anyway). // TODO: This could be made more efficient, currently it may cause multiple consecutive resizes. - GridRegion<3> retain_region = subrange_to_grid_box(subrange<3>{existing_buf.offset, existing_buf.storage->get_range()}); - if(!access::mode_traits::is_consumer(mode)) { - retain_region = GridRegion<3>::difference(retain_region, subrange_to_grid_box(subrange<3>{sr.offset, sr.range})); + region retain_region(subrange(existing_buf.offset, existing_buf.storage->get_range())); + if(!access::mode_traits::is_consumer(mode)) { retain_region = region_difference(retain_region, region(sr)); } + for(const subrange<3> sr : retain_region.get_boxes()) { + access_host_buffer_impl(bid, access_mode::read, sr); } - retain_region.scanByBoxes([&](const GridBox<3>& box) { - const auto sr = grid_box_to_subrange(box); - access_host_buffer_impl(bid, access_mode::read, subrange<3>{sr.offset, sr.range}); - }); // We now have all data "backed up" on the host, so we may deallocate the device buffer (via destructor). existing_buf = backing_buffer{}; @@ -242,24 +239,21 @@ namespace detail { const auto target_buffer_location = target_buffer.storage->get_type() == buffer_type::host_buffer ? data_location::host : data_location::device; - const auto coherent_box = subrange_to_grid_box(coherent_sr); + const auto coherent_box = box(coherent_sr); // If a previous buffer is provided, we may have to retain some or all of the existing data. - const GridRegion<3> retain_region = ([&]() { - GridRegion<3> result = coherent_box; - if(previous_buffer.is_allocated()) { - result = GridRegion<3>::merge(result, subrange_to_grid_box({previous_buffer.offset, previous_buffer.storage->get_range()})); - } - return result; + const region<3> retain_region = ([&]() { + std::vector> boxes{coherent_box}; + if(previous_buffer.is_allocated()) { boxes.push_back(subrange(previous_buffer.offset, previous_buffer.storage->get_range())); } + return region(std::move(boxes)); })(); // IIFE // Sanity check: Retain region must be at least as large as coherence box (and fully overlap). - assert(coherent_box.area() <= retain_region.area()); - assert(GridRegion<3>::difference(coherent_box, retain_region).empty()); + assert(coherent_box.get_area() <= retain_region.get_area()); + assert(region_difference(coherent_box, retain_region).empty()); // Also check that the new target buffer could actually fit the entire retain region. - assert((grid_box_to_subrange(retain_region.boundingBox()).offset >= target_buffer.offset) == id(true, true, true)); - assert((grid_box_to_subrange(retain_region.boundingBox()).offset + grid_box_to_subrange(retain_region.boundingBox()).range - <= target_buffer.offset + target_buffer.storage->get_range()) + assert((bounding_box(retain_region).get_offset() >= target_buffer.offset) == id(true, true, true)); + assert((bounding_box(retain_region).get_offset() + bounding_box(retain_region).get_range() <= target_buffer.offset + target_buffer.storage->get_range()) == id(true, true, true)); // Check whether we have any scheduled transfers that overlap with the requested subrange, and if so, apply them. @@ -271,15 +265,15 @@ namespace detail { if(detail::access::mode_traits::is_consumer(mode)) #endif { - GridRegion<3> updated_region; + std::vector> updated_region_boxes; std::vector remaining_transfers; auto& scheduled_buffer_transfers = m_scheduled_transfers[bid]; remaining_transfers.reserve(scheduled_buffer_transfers.size() / 2); for(auto& t : scheduled_buffer_transfers) { - auto t_region = subrange_to_grid_box(t.sr); + auto t_box = box(t.sr); // Check whether this transfer applies to the current request. - auto t_minus_coherent_region = GridRegion<3>::difference(t_region, coherent_box); + auto t_minus_coherent_region = region_difference(t_box, coherent_box); if(!t_minus_coherent_region.empty()) { // Check if transfer applies partially. // This might happen in certain situations, when two different commands partially overlap in their required buffer ranges. @@ -289,19 +283,19 @@ namespace detail { // NOTE: We currently assume that one of the requests will consume the FULL transfer. Only then we discard it. // This assumption is valid right now, as the graph generator will not consolidate adjacent pushes for two (or more) // separate commands. This might however change in the future. - if(t_minus_coherent_region != t_region) { + if(t_minus_coherent_region != t_box) { assert(detail::access::mode_traits::is_consumer(mode)); - auto intersection = GridRegion<3>::intersect(t_region, coherent_box); - remaining_region_after_transfers = GridRegion<3>::difference(remaining_region_after_transfers, intersection); + auto intersection = region(box_intersection(t_box, coherent_box)); // TODO this can be a box instead of a region! + remaining_region_after_transfers = region_difference(remaining_region_after_transfers, intersection); const auto element_size = m_buffer_infos.at(bid).element_size; - intersection.scanByBoxes([&](const GridBox<3>& box) { - auto sr = grid_box_to_subrange(box); + for(const auto& box : intersection.get_boxes()) { + auto sr = box.get_subrange(); // TODO can this temp buffer be avoided? auto tmp = make_uninitialized_payload(sr.range.size() * element_size); linearize_subrange(t.linearized.get_pointer(), tmp.get_pointer(), element_size, t.sr.range, {sr.offset - t.sr.offset, sr.range}); target_buffer.storage->set_data({target_buffer.get_local_offset(sr.offset), sr.range}, tmp.get_pointer()); - updated_region = GridRegion<3>::merge(updated_region, box); - }); + updated_region_boxes.push_back(box); + } } // Transfer only applies partially, or not at all - which means we have to keep it around. remaining_transfers.emplace_back(std::move(t)); @@ -310,35 +304,35 @@ namespace detail { // Transfer applies fully. assert(detail::access::mode_traits::is_consumer(mode)); - remaining_region_after_transfers = GridRegion<3>::difference(remaining_region_after_transfers, t_region); + remaining_region_after_transfers = region_difference(remaining_region_after_transfers, t_box); target_buffer.storage->set_data({target_buffer.get_local_offset(t.sr.offset), t.sr.range}, t.linearized.get_pointer()); - updated_region = GridRegion<3>::merge(updated_region, t_region); + updated_region_boxes.push_back(t_box); } // The target buffer now has the newest data in this region. - m_newest_data_location.at(bid).update_region(updated_region, target_buffer_location); + m_newest_data_location.at(bid).update_region(region(std::move(updated_region_boxes)), target_buffer_location); scheduled_buffer_transfers = std::move(remaining_transfers); } if(!remaining_region_after_transfers.empty()) { - const auto maybe_retain_box = [&](const GridBox<3>& box) { + const auto maybe_retain_box = [&](const box<3>& box) { if(detail::access::mode_traits::is_consumer(mode)) { // If we are accessing the buffer using a consumer mode, we have to retain the full previous contents, otherwise... - const auto box_sr = grid_box_to_subrange(box); + const auto box_sr = box.get_subrange(); target_buffer.storage->copy( *previous_buffer.storage, previous_buffer.get_local_offset(box_sr.offset), target_buffer.get_local_offset(box_sr.offset), box_sr.range); } else { // ...check if there are parts of the previous buffer that we are not going to overwrite (and thus have to retain). // If so, copy only those parts. - const auto remaining_region = GridRegion<3>::difference(box, coherent_box); - remaining_region.scanByBoxes([&](const GridBox<3>& small_box) { - const auto small_box_sr = grid_box_to_subrange(small_box); + const auto remaining_region = region_difference(box, coherent_box); + for(const auto& small_box : remaining_region.get_boxes()) { + const auto small_box_sr = small_box.get_subrange(); target_buffer.storage->copy(*previous_buffer.storage, previous_buffer.get_local_offset(small_box_sr.offset), target_buffer.get_local_offset(small_box_sr.offset), small_box_sr.range); - }); + } } }; - GridRegion<3> replicated_region; + std::vector> replicated_boxes; auto& buffer_data_locations = m_newest_data_location.at(bid); const auto data_locations = buffer_data_locations.get_region_values(remaining_region_after_transfers); for(auto& dl : data_locations) { @@ -354,21 +348,21 @@ namespace detail { // Copy from host, unless we are using a pure producer mode else if(dl.second == data_location::host && detail::access::mode_traits::is_consumer(mode)) { assert(m_buffers[bid].host_buf.is_allocated()); - const auto box_sr = grid_box_to_subrange(dl.first); + const auto box_sr = dl.first.get_subrange(); const auto& host_buf = m_buffers[bid].host_buf; target_buffer.storage->copy( *host_buf.storage, host_buf.get_local_offset(box_sr.offset), target_buffer.get_local_offset(box_sr.offset), box_sr.range); - replicated_region = GridRegion<3>::merge(replicated_region, dl.first); + replicated_boxes.push_back(dl.first); } } else if(target_buffer.storage->get_type() == buffer_type::host_buffer) { // Copy from device, unless we are using a pure producer mode if(dl.second == data_location::device && detail::access::mode_traits::is_consumer(mode)) { assert(m_buffers[bid].device_buf.is_allocated()); - const auto box_sr = grid_box_to_subrange(dl.first); + const auto box_sr = dl.first.get_subrange(); const auto& device_buf = m_buffers[bid].device_buf; target_buffer.storage->copy( *device_buf.storage, device_buf.get_local_offset(box_sr.offset), target_buffer.get_local_offset(box_sr.offset), box_sr.range); - replicated_region = GridRegion<3>::merge(replicated_region, dl.first); + replicated_boxes.push_back(dl.first); } // Copy from host in case we are resizing an existing buffer else if((dl.second == data_location::host || dl.second == data_location::host_and_device) && previous_buffer.is_allocated()) { @@ -378,7 +372,7 @@ namespace detail { } // Finally, remember the fact that we replicated some regions to the new target location. - buffer_data_locations.update_region(replicated_region, data_location::host_and_device); + buffer_data_locations.update_region(region(std::move(replicated_boxes)), data_location::host_and_device); } if(detail::access::mode_traits::is_producer(mode)) { m_newest_data_location.at(bid).update_region(coherent_box, target_buffer_location); } diff --git a/src/buffer_transfer_manager.cc b/src/buffer_transfer_manager.cc index 9278c2336..c0f7fddc7 100644 --- a/src/buffer_transfer_manager.cc +++ b/src/buffer_transfer_manager.cc @@ -64,7 +64,7 @@ namespace detail { assert(pkg.get_command_type() == command_type::await_push); const auto& data = std::get(pkg.data); - GridRegion<3> expected_region = data.region; + const auto &expected_region = data.region; std::shared_ptr t_handle; // Check to see if we have (fully) received the data already diff --git a/src/distributed_graph_generator.cc b/src/distributed_graph_generator.cc index f08e24072..c753a8aca 100644 --- a/src/distributed_graph_generator.cc +++ b/src/distributed_graph_generator.cc @@ -30,8 +30,8 @@ void distributed_graph_generator::add_buffer(const buffer_id bid, const int dims std::piecewise_construct, std::tuple{bid}, std::tuple{region_map{range, dims}, region_map{range, dims}}); // Mark contents as available locally (= don't generate await push commands) and fully replicated (= don't generate push commands). // This is required when tasks access host-initialized or uninitialized buffers. - m_buffer_states.at(bid).local_last_writer.update_region(subrange_to_grid_box({id<3>(), range}), m_epoch_for_new_commands); - m_buffer_states.at(bid).replicated_regions.update_region(subrange_to_grid_box({id<3>(), range}), node_bitset{}.set()); + m_buffer_states.at(bid).local_last_writer.update_region(subrange<3>({}, range), m_epoch_for_new_commands); + m_buffer_states.at(bid).replicated_regions.update_region(subrange<3>({}, range), node_bitset{}.set()); } // We simply split in the first dimension for now @@ -81,7 +81,7 @@ static std::vector> split_equal(const chunk<3>& full_chunk, const range return result; } -using buffer_requirements_map = std::unordered_map>>; +using buffer_requirements_map = std::unordered_map>>; static buffer_requirements_map get_buffer_requirements_for_mapped_access(const task& tsk, subrange<3> sr, const range<3> global_size) { buffer_requirements_map result; @@ -169,9 +169,9 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) const auto chunks_per_node = std::max(1, chunks.size() / m_num_nodes); // Union of all per-buffer writes on this node, used to determine which parts of a buffer are fresh/stale later on. - std::unordered_map> per_buffer_local_writes; + std::unordered_map> per_buffer_local_writes; // In case we need to push a region that is overwritten in the same task, we have to defer updating the last writer. - std::unordered_map, command_id>>> per_buffer_last_writer_update_list; + std::unordered_map, command_id>>> per_buffer_last_writer_update_list; // Buffers that currently are in a pending reduction state will receive a new buffer state after a reduction has been generated. std::unordered_map post_reduction_buffer_states; @@ -184,6 +184,9 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) // TODO: Revisit this at some point. const node_id reduction_initializer_nid = 0; + const box<3> empty_box({0, 0, 0}, {0, 0, 0}); + const box<3> scalar_box({0, 0, 0}, {1, 1, 1}); + // Iterate over all chunks, distinguish between local / remote chunks and normal / reduction access. // // Normal buffer access: @@ -213,7 +216,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) assert(requirements[reduction.bid].count(pmode) == 0); // task_manager verifies that there are no reduction <-> write-access conflicts } #endif - requirements[reduction.bid][rmode] = GridRegion<3>{{1, 1, 1}}; + requirements[reduction.bid][rmode] = scalar_box; } abstract_command* cmd = nullptr; @@ -285,22 +288,24 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) if(detail::access::mode_traits::is_consumer(mode)) { if(is_local_chunk) { // Store the read access for determining anti-dependencies later on - m_command_buffer_reads[cmd->get_cid()][bid] = GridRegion<3>::merge(m_command_buffer_reads[cmd->get_cid()][bid], req); + m_command_buffer_reads[cmd->get_cid()][bid] = region_union(m_command_buffer_reads[cmd->get_cid()][bid], req); } if(is_local_chunk && !is_pending_reduction) { const auto local_sources = buffer_state.local_last_writer.get_region_values(req); - GridRegion<3> missing_parts; + std::vector> missing_part_boxes; for(const auto& [box, wcs] : local_sources) { + if(box.empty()) continue; if(!wcs.is_fresh()) { - missing_parts = GridRegion<3>::merge(missing_parts, box); + missing_part_boxes.push_back(box); continue; } m_cdag.add_dependency(cmd, m_cdag.get(wcs), dependency_kind::true_dep, dependency_origin::dataflow); } // There is data we don't yet have locally. Generate an await push command for it. - if(!missing_parts.empty()) { + if(!missing_part_boxes.empty()) { + const region missing_parts(std::move(missing_part_boxes)); assert(m_num_nodes > 1); auto* const ap_cmd = create_command(bid, 0, trid, missing_parts); m_cdag.add_dependency(cmd, ap_cmd, dependency_kind::true_dep, dependency_origin::dataflow); @@ -322,7 +327,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) // Generate separate push command for each last writer command for now, // possibly even multiple for partially already-replicated data. // TODO: Can and/or should we consolidate? - auto* const push_cmd = create_command(bid, 0, nid, trid, grid_box_to_subrange(replicated_box)); + auto* const push_cmd = create_command(bid, 0, nid, trid, replicated_box.get_subrange()); assert(!utils::isa(m_cdag.get(wcs)) && "Attempting to push non-owned data?!"); m_cdag.add_dependency(push_cmd, m_cdag.get(wcs), dependency_kind::true_dep, dependency_origin::dataflow); generated_pushes.push_back(push_cmd); @@ -343,7 +348,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) // generating anti-dependencies around this requirement. This might not be valid if (multivariate) reductions ever operate on regions. if(!generate_reduction) { generate_anti_dependencies(tsk.get_id(), bid, buffer_state.local_last_writer, req, cmd); } - per_buffer_local_writes[bid] = GridRegion<3>::merge(per_buffer_local_writes[bid], req); + per_buffer_local_writes[bid] = region_union(per_buffer_local_writes[bid], req); per_buffer_last_writer_update_list[bid].push_back({req, cmd->get_cid()}); } } @@ -351,10 +356,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) if(generate_reduction) { const auto& reduction = *buffer_state.pending_reduction; - const GridBox<3> box{GridPoint<3>{1, 1, 1}}; - const subrange<3> sr{{}, {1, 1, 1}}; - - const auto local_last_writer = buffer_state.local_last_writer.get_region_values(box); + const auto local_last_writer = buffer_state.local_last_writer.get_region_values(scalar_box); assert(local_last_writer.size() == 1); if(is_local_chunk) { @@ -365,35 +367,35 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) m_cdag.add_dependency(reduce_cmd, m_cdag.get(local_last_writer[0].second), dependency_kind::true_dep, dependency_origin::dataflow); } - auto* const ap_cmd = create_command(bid, reduction.rid, trid, subrange_to_grid_box(sr)); + auto* const ap_cmd = create_command(bid, reduction.rid, trid, scalar_box.get_subrange()); m_cdag.add_dependency(reduce_cmd, ap_cmd, dependency_kind::true_dep, dependency_origin::dataflow); generate_epoch_dependencies(ap_cmd); m_cdag.add_dependency(cmd, reduce_cmd, dependency_kind::true_dep, dependency_origin::dataflow); // Reduction command becomes the last writer (this may be overriden if this task also writes to the reduction buffer) - post_reduction_buffer_states.at(bid).local_last_writer.update_box(box, reduce_cmd->get_cid()); + post_reduction_buffer_states.at(bid).local_last_writer.update_box(scalar_box, reduce_cmd->get_cid()); } else { // Push an empty range if we don't have any fresh data on this node const bool notification_only = !local_last_writer[0].second.is_fresh(); - const auto push_sr = notification_only ? subrange<3>{{}, {0, 0, 0}} : sr; + const auto push_box = notification_only ? empty_box : scalar_box; - auto* const push_cmd = create_command(bid, reduction.rid, nid, trid, push_sr); + auto* const push_cmd = create_command(bid, reduction.rid, nid, trid, push_box.get_subrange()); generated_pushes.push_back(push_cmd); if(notification_only) { generate_epoch_dependencies(push_cmd); } else { - m_command_buffer_reads[push_cmd->get_cid()][bid] = GridRegion<3>::merge(m_command_buffer_reads[push_cmd->get_cid()][bid], box); + m_command_buffer_reads[push_cmd->get_cid()][bid] = region_union(m_command_buffer_reads[push_cmd->get_cid()][bid], scalar_box); m_cdag.add_dependency(push_cmd, m_cdag.get(local_last_writer[0].second), dependency_kind::true_dep, dependency_origin::dataflow); } // Mark the reduction result as replicated so we don't generate data transfers to this node // TODO: We need a way of updating regions in place! E.g. apply_to_values(box, callback) - const auto replicated_box = post_reduction_buffer_states.at(bid).replicated_regions.get_region_values(box); + const auto replicated_box = post_reduction_buffer_states.at(bid).replicated_regions.get_region_values(scalar_box); assert(replicated_box.size() == 1); for(const auto& [_, nodes] : replicated_box) { - post_reduction_buffer_states.at(bid).replicated_regions.update_box(box, node_bitset{nodes}.set(nid)); + post_reduction_buffer_states.at(bid).replicated_regions.update_box(scalar_box, node_bitset{nodes}.set(nid)); } } } @@ -447,7 +449,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) // These can happen in rare cases, when the node that pushes a buffer range also writes to that range within the same task. // We cannot do this while generating the push command, as we may not have the writing command recorded at that point. for(auto* push_cmd : generated_pushes) { - const auto last_writers = m_buffer_states.at(push_cmd->get_bid()).local_last_writer.get_region_values(subrange_to_grid_box(push_cmd->get_range())); + const auto last_writers = m_buffer_states.at(push_cmd->get_bid()).local_last_writer.get_region_values(region(push_cmd->get_range())); for(const auto& [box, wcs] : last_writers) { assert(!box.empty()); // If we want to push it it cannot be empty @@ -477,14 +479,16 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) // Determine which local data is fresh/stale based on task-level writes. auto requirements = get_buffer_requirements_for_mapped_access(tsk, subrange<3>(tsk.get_global_offset(), tsk.get_global_size()), tsk.get_global_size()); for(auto& [bid, reqs_by_mode] : requirements) { - GridRegion<3> global_writes; + std::vector> global_write_boxes; for(const auto mode : access::producer_modes) { if(reqs_by_mode.count(mode) == 0) continue; - global_writes = GridRegion<3>::merge(global_writes, reqs_by_mode.at(mode)); + const auto& by_mode = reqs_by_mode.at(mode); + global_write_boxes.insert(global_write_boxes.end(), by_mode.get_boxes().begin(), by_mode.get_boxes().end()); } + const region global_writes(std::move(global_write_boxes)); const auto& local_writes = per_buffer_local_writes[bid]; - assert(GridRegion<3>::difference(local_writes, global_writes).empty()); // Local writes have to be a subset of global writes - const auto remote_writes = GridRegion<3>::difference(global_writes, local_writes); + assert(region_difference(local_writes, global_writes).empty()); // Local writes have to be a subset of global writes + const auto remote_writes = region_difference(global_writes, local_writes); auto& buffer_state = m_buffer_states.at(bid); // TODO: We need a way of updating regions in place! E.g. apply_to_values(box, callback) @@ -501,7 +505,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) } void distributed_graph_generator::generate_anti_dependencies( - task_id tid, buffer_id bid, const region_map& last_writers_map, const GridRegion<3>& write_req, abstract_command* write_cmd) { + task_id tid, buffer_id bid, const region_map& last_writers_map, const region<3>& write_req, abstract_command* write_cmd) { const auto last_writers = last_writers_map.get_region_values(write_req); for(const auto& [box, wcs] : last_writers) { auto* const last_writer_cmd = m_cdag.get(static_cast(wcs)); @@ -523,7 +527,7 @@ void distributed_graph_generator::generate_anti_dependencies( const auto& command_reads = command_reads_it->second; // The task might be a dependent because of another buffer if(const auto buffer_reads_it = command_reads.find(bid); buffer_reads_it != command_reads.end()) { - if(!GridRegion<3>::intersect(write_req, buffer_reads_it->second).empty()) { + if(!region_intersection(write_req, buffer_reads_it->second).empty()) { has_successors = true; m_cdag.add_dependency(write_cmd, cmd, dependency_kind::anti_dep, dependency_origin::dataflow); } diff --git a/src/print_graph.cc b/src/print_graph.cc index 5ecc33812..0bba72cb4 100644 --- a/src/print_graph.cc +++ b/src/print_graph.cc @@ -44,9 +44,9 @@ void format_requirements(std::string& label, const reduction_list& reductions, c const access_mode reduction_init_mode) { for(const auto& [rid, bid, buffer_name, init_from_buffer] : reductions) { auto rmode = init_from_buffer ? reduction_init_mode : cl::sycl::access::mode::discard_write; - const auto req = GridRegion<3>{{1, 1, 1}}; + const region scalar_region(box<3>({0, 0, 0}, {1, 1, 1})); const std::string bl = get_buffer_label(bid, buffer_name); - fmt::format_to(std::back_inserter(label), "
(R{}) {} {} {}", rid, detail::access::mode_traits::name(rmode), bl, req); + fmt::format_to(std::back_inserter(label), "
(R{}) {} {} {}", rid, detail::access::mode_traits::name(rmode), bl, scalar_region); } for(const auto& [bid, buffer_name, mode, req] : accesses) { @@ -111,12 +111,12 @@ std::string get_command_label(const node_id local_nid, const command_record& cmd if(cmd.epoch_action == epoch_action::shutdown) { label += " (shutdown)"; } } break; case command_type::execution: { - fmt::format_to(std::back_inserter(label), "execution {}", subrange_to_grid_box(cmd.execution_range.value())); + fmt::format_to(std::back_inserter(label), "execution {}", cmd.execution_range.value()); } break; case command_type::push: { add_reduction_id_if_reduction(); - fmt::format_to(std::back_inserter(label), "push transfer {} to N{}
B{} {}", // - cmd.transfer_id.value(), cmd.target.value(), buffer_label, subrange_to_grid_box(cmd.push_range.value())); + fmt::format_to(std::back_inserter(label), "push transfer {} to N{}
B{} {}", cmd.transfer_id.value(), cmd.target.value(), buffer_label, + cmd.push_range.value()); } break; case command_type::await_push: { add_reduction_id_if_reduction(); @@ -124,7 +124,8 @@ std::string get_command_label(const node_id local_nid, const command_record& cmd cmd.transfer_id.value(), buffer_label, cmd.await_region.value()); } break; case command_type::reduction: { - fmt::format_to(std::back_inserter(label), "reduction R{}
{} {}", cmd.reduction_id.value(), buffer_label, GridRegion<3>{{1, 1, 1}}); + const region scalar_region(box<3>({0, 0, 0}, {1, 1, 1})); + fmt::format_to(std::back_inserter(label), "reduction R{}
{} {}", cmd.reduction_id.value(), buffer_label, scalar_region); } break; case command_type::horizon: { label += "horizon"; diff --git a/src/print_utils.cc b/src/print_utils.cc deleted file mode 100644 index fc7c9cd51..000000000 --- a/src/print_utils.cc +++ /dev/null @@ -1,24 +0,0 @@ -#include "print_utils.h" - -#include - -#include - -namespace celerity { -namespace detail { - - std::ostream& print_chunk3(std::ostream& os, chunk<3> chnk3) { - auto start = chnk3.offset; - auto end = chnk3.offset + chnk3.range; - auto size = chnk3.global_size; - return os << fmt::format("[{},{},{}] - [{},{},{}] : {{{},{},{}}}", start[0], start[1], start[2], end[0], end[1], end[2], size[0], size[1], size[2]); - } - - std::ostream& print_subrange3(std::ostream& os, subrange<3> subr3) { - auto start = subr3.offset; - auto end = subr3.offset + subr3.range; - return os << fmt::format("[{},{},{}] - [{},{},{}]", start[0], start[1], start[2], end[0], end[1], end[2]); - } - -} // namespace detail -} // namespace celerity diff --git a/src/recorders.cc b/src/recorders.cc index 9928141a6..1b7fae260 100644 --- a/src/recorders.cc +++ b/src/recorders.cc @@ -100,7 +100,7 @@ std::optional get_target(const abstract_command& cmd) { return {}; } -std::optional> get_await_region(const abstract_command& cmd) { +std::optional> get_await_region(const abstract_command& cmd) { if(const auto* await_push_cmd = dynamic_cast(&cmd)) return await_push_cmd->get_region(); return {}; } diff --git a/src/task.cc b/src/task.cc index b552879b1..c8cf40480 100644 --- a/src/task.cc +++ b/src/task.cc @@ -33,17 +33,17 @@ namespace detail { return subrange<3>{}; } - GridRegion<3> buffer_access_map::get_mode_requirements( + region<3> buffer_access_map::get_mode_requirements( const buffer_id bid, const access_mode mode, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const { - GridRegion<3> result; + std::vector> boxes; for(size_t i = 0; i < m_accesses.size(); ++i) { if(m_accesses[i].first != bid || m_accesses[i].second->get_access_mode() != mode) continue; - result = GridRegion<3>::merge(result, get_requirements_for_nth_access(i, kernel_dims, sr, global_size)); + boxes.push_back(get_requirements_for_nth_access(i, kernel_dims, sr, global_size)); } - return result; + return region(std::move(boxes)); } - GridBox<3> buffer_access_map::get_requirements_for_nth_access( + box<3> buffer_access_map::get_requirements_for_nth_access( const size_t n, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const { const auto& [_, rm] = m_accesses[n]; @@ -56,7 +56,7 @@ namespace detail { case 3: req = apply_range_mapper<3>(rm.get(), chunk_cast<3>(chnk)); break; default: assert(!"Unreachable"); } - return subrange_to_grid_box(req); + return req; } void side_effect_map::add_side_effect(const host_object_id hoid, const experimental::side_effect_order order) { diff --git a/src/task_manager.cc b/src/task_manager.cc index 3f8ddce6a..456a74f9e 100644 --- a/src/task_manager.cc +++ b/src/task_manager.cc @@ -17,7 +17,7 @@ namespace detail { void task_manager::add_buffer(buffer_id bid, const int dims, const range<3>& range, bool host_initialized) { m_buffers_last_writers.emplace(std::piecewise_construct, std::tuple{bid}, std::tuple{range, dims}); - if(host_initialized) { m_buffers_last_writers.at(bid).update_region(subrange_to_grid_box(subrange<3>({}, range)), m_epoch_for_new_tasks); } + if(host_initialized) { m_buffers_last_writers.at(bid).update_region(subrange<3>({}, range), m_epoch_for_new_tasks); } } const task* task_manager::find_task(task_id tid) const { return m_task_buffer.find_task(tid); } @@ -53,14 +53,15 @@ namespace detail { void task_manager::await_epoch(task_id epoch) { m_latest_epoch_reached.await(epoch); } - GridRegion<3> get_requirements(const task& tsk, buffer_id bid, const std::vector modes) { + region<3> get_requirements(const task& tsk, buffer_id bid, const std::vector& modes) { const auto& access_map = tsk.get_buffer_access_map(); const subrange<3> full_range{tsk.get_global_offset(), tsk.get_global_size()}; - GridRegion<3> result; + std::vector> boxes; for(auto m : modes) { - result = GridRegion<3>::merge(result, access_map.get_mode_requirements(bid, m, tsk.get_dimensions(), full_range, tsk.get_global_size())); + const auto req = access_map.get_mode_requirements(bid, m, tsk.get_dimensions(), full_range, tsk.get_global_size()); + boxes.insert(boxes.end(), req.get_boxes().begin(), req.get_boxes().end()); } - return result; + return region(std::move(boxes)); } void task_manager::compute_dependencies(task& tsk) { @@ -73,6 +74,8 @@ namespace detail { buffers.emplace(reduction.bid); } + const box<3> scalar_box({0, 0, 0}, {1, 1, 1}); + for(const auto bid : buffers) { const auto modes = access_map.get_access_modes(bid); @@ -92,7 +95,7 @@ namespace detail { // Determine reader dependencies if(std::any_of(modes.cbegin(), modes.cend(), detail::access::mode_traits::is_consumer) || (reduction.has_value() && reduction->init_from_buffer)) { auto read_requirements = get_requirements(tsk, bid, {detail::access::consumer_modes.cbegin(), detail::access::consumer_modes.cend()}); - if(reduction.has_value()) { read_requirements = GridRegion<3>::merge(read_requirements, GridRegion<3>{{1, 1, 1}}); } + if(reduction.has_value()) { read_requirements = region_union(read_requirements, scalar_box); } const auto last_writers = m_buffers_last_writers.at(bid).get_region_values(read_requirements); for(auto& p : last_writers) { @@ -107,7 +110,7 @@ namespace detail { // Update last writers and determine anti-dependencies if(std::any_of(modes.cbegin(), modes.cend(), detail::access::mode_traits::is_producer) || reduction.has_value()) { auto write_requirements = get_requirements(tsk, bid, {detail::access::producer_modes.cbegin(), detail::access::producer_modes.cend()}); - if(reduction.has_value()) { write_requirements = GridRegion<3>::merge(write_requirements, GridRegion<3>{{1, 1, 1}}); } + if(reduction.has_value()) { write_requirements = region_union(write_requirements, scalar_box); } if(write_requirements.empty()) continue; const auto last_writers = m_buffers_last_writers.at(bid).get_region_values(write_requirements); @@ -128,7 +131,7 @@ namespace detail { const auto dependent_read_requirements = get_requirements(*dependent.node, bid, {detail::access::consumer_modes.cbegin(), detail::access::consumer_modes.cend()}); // Only add an anti-dependency if we are really writing over the region read by this task - if(!GridRegion<3>::intersect(write_requirements, dependent_read_requirements).empty()) { + if(!region_intersection(write_requirements, dependent_read_requirements).empty()) { add_dependency(tsk, *dependent.node, dependency_kind::anti_dep, dependency_origin::dataflow); has_anti_dependents = true; } diff --git a/src/worker_job.cc b/src/worker_job.cc index 9ef1abcd2..046932b63 100644 --- a/src/worker_job.cc +++ b/src/worker_job.cc @@ -158,7 +158,7 @@ namespace detail { access_infos.reserve(access_map.get_num_accesses()); for(size_t i = 0; i < access_map.get_num_accesses(); ++i) { const auto [bid, mode] = access_map.get_nth_access(i); - const auto sr = grid_box_to_subrange(access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size())); + const auto sr = access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()).get_subrange(); const auto info = m_buffer_mngr.access_host_buffer(bid, mode, sr); access_infos.push_back(closure_hydrator::accessor_info{info.ptr, info.backing_buffer_range, info.backing_buffer_offset, sr}); } @@ -212,7 +212,7 @@ namespace detail { for(size_t i = 0; i < access_map.get_num_accesses(); ++i) { const auto [bid, mode] = access_map.get_nth_access(i); - const auto sr = grid_box_to_subrange(access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size())); + const auto sr = access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()).get_subrange(); try { const auto info = m_buffer_mngr.access_device_buffer(bid, mode, sr); @@ -262,7 +262,7 @@ namespace detail { if(oob_max != id<3>{1, 1, 1}) { const auto& access_map = tsk->get_buffer_access_map(); const auto acc_sr = - grid_box_to_subrange(access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size())); + access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()).get_subrange(); const auto oob_sr = subrange<3>(oob_min, range_cast<3>(oob_max - oob_min)); CELERITY_ERROR("Out-of-bounds access in kernel '{}' detected: Accessor {} for buffer {} attempted to access indices between {} which are " "outside of mapped subrange {}", diff --git a/test/accessor_tests.cc b/test/accessor_tests.cc index 277497d85..15eec65fd 100644 --- a/test/accessor_tests.cc +++ b/test/accessor_tests.cc @@ -162,7 +162,7 @@ namespace detail { // #if __SYCL_DEVICE_ONLY__ did get rid of the segfault, but caused the test to fail with a heap corruption at runtime. Instead, replacing id // with size_t seems to resolve the problem. - const auto range = range_cast(celerity::range<3>(2, 3, 4)); + const auto range = test_utils::truncate_range({2, 3, 4}); auto& bm = accessor_fixture::get_buffer_manager(); auto bid = bm.template register_buffer(range_cast<3>(range)); @@ -170,15 +170,15 @@ namespace detail { auto sr = subrange<3>({}, range_cast<3>(range)); // this kernel initializes the buffer what will be read after. - auto acc_write = accessor_fixture::template get_device_accessor(bid, range_cast(range), {}); + auto acc_write = accessor_fixture::template get_device_accessor(bid, range, {}); test_utils::run_parallel_for>(accessor_fixture::get_device_queue().get_sycl_queue(), - range_cast(range), {}, [=](celerity::item item) { acc_write[item] = item.get_linear_id(); }); + range, {}, [=](celerity::item item) { acc_write[item] = item.get_linear_id(); }); SECTION("for device buffers") { - auto acc_read = accessor_fixture::template get_device_accessor(bid, range_cast(range), {}); - auto acc = accessor_fixture::template get_device_accessor(bid, range_cast(range), {}); + auto acc_read = accessor_fixture::template get_device_accessor(bid, range, {}); + auto acc = accessor_fixture::template get_device_accessor(bid, range, {}); test_utils::run_parallel_for>( - accessor_fixture::get_device_queue().get_sycl_queue(), range_cast(range), {}, [=](celerity::item item) { + accessor_fixture::get_device_queue().get_sycl_queue(), range, {}, [=](celerity::item item) { size_t i = item[0]; size_t j = item[1]; if constexpr(Dims == 2) { @@ -191,8 +191,8 @@ namespace detail { } SECTION("for host buffers") { - auto acc_read = accessor_fixture::template get_host_accessor(bid, range_cast(range), {}); - auto acc = accessor_fixture::template get_host_accessor(bid, range_cast(range), {}); + auto acc_read = accessor_fixture::template get_host_accessor(bid, range, {}); + auto acc = accessor_fixture::template get_host_accessor(bid, range, {}); for(size_t i = 0; i < range[0]; i++) { for(size_t j = 0; j < range[1]; j++) { for(size_t k = 0; k < (Dims == 2 ? 1 : range[2]); k++) { @@ -207,8 +207,8 @@ namespace detail { } typename accessor_fixture::access_target tgt = accessor_fixture::access_target::host; - bool acc_check = accessor_fixture::template buffer_reduce>(bid, tgt, range_cast(range), - {}, true, [range = range_cast(range)](id idx, bool current, size_t value) { return current && value == get_linear_index(range, idx); }); + bool acc_check = accessor_fixture::template buffer_reduce>(bid, tgt, range, + {}, true, [range = range](id idx, bool current, size_t value) { return current && value == get_linear_index(range, idx); }); REQUIRE(acc_check); } @@ -257,7 +257,7 @@ namespace detail { buffer verify_buf{&verified, 1}; q.submit([&](handler& cgh) { // access with offset == buffer range just to mess with things - const auto offset = id_cast<1>(test_buf.get_range()); + const auto offset = id(test_buf.get_range()); const auto test_acc = test_buf.get_access(cgh, [=](chunk<1>) { return subrange<1>{offset, 0}; }); const auto verify_acc = verify_buf.get_access(cgh, one_to_one{}); cgh.parallel_for>(range<1>{1}, [=](item<1>) { @@ -654,10 +654,10 @@ namespace detail { #if !CELERITY_ACCESSOR_BOUNDARY_CHECK SKIP("CELERITY_ACCESSOR_BOUNDARY_CHECK=0"); #endif - buffer buff(range_cast(range<3>{10, 20, 30})); - const auto accessible_sr = subrange_cast(subrange<3>{{5, 10, 15}, {1, 2, 3}}); - const auto oob_idx_lo = id_cast(id<3>{1, 2, 3}); - const auto oob_idx_hi = id_cast(id<3>{7, 13, 25}); + buffer buff(test_utils::truncate_range({10, 20, 30})); + const auto accessible_sr = test_utils::truncate_subrange({{5, 10, 15}, {1, 2, 3}}); + const auto oob_idx_lo = test_utils::truncate_id({1, 2, 3}); + const auto oob_idx_hi = test_utils::truncate_id({7, 13, 25}); // we need to be careful about the orderign of the construction and destruction // of the Celerity queue and the log capturing utility here diff --git a/test/backend_tests.cc b/test/backend_tests.cc index 93ddd74bd..86cb184c1 100644 --- a/test/backend_tests.cc +++ b/test/backend_tests.cc @@ -44,11 +44,11 @@ void verify_copied_linear_ids(const size_t* host_buf, const range& source_ template struct copy_parameters { - range source_range = range_cast(range<3>(5, 7, 11)); - range target_range = range_cast(range<3>(13, 17, 19)); - range copy_range = range_cast(range<3>(2, 4, 8)); - id source_offset = id_cast(id<3>(2, 2, 2)); - id target_offset = id_cast(id<3>(3, 5, 7)); + range source_range = test_utils::truncate_range({5, 7, 11}); + range target_range = test_utils::truncate_range({13, 17, 19}); + range copy_range = test_utils::truncate_range({2, 4, 8}); + id source_offset = test_utils::truncate_id({2, 2, 2}); + id target_offset = test_utils::truncate_id({3, 5, 7}); }; template diff --git a/test/graph_gen_granularity_tests.cc b/test/graph_gen_granularity_tests.cc index 1a87c44de..9036f0677 100644 --- a/test/graph_gen_granularity_tests.cc +++ b/test/graph_gen_granularity_tests.cc @@ -71,13 +71,13 @@ TEMPLATE_TEST_CASE_SIG("distributed_graph_generator does not create empty chunks task_id tid = -1; SECTION("for simple tasks") { - task_range = range_cast(range<3>(2, 2, 2)); + task_range = truncate_range({2, 2, 2}); tid = dctx.device_compute>(task_range).submit(); } SECTION("for nd-range tasks") { - task_range = range_cast(range<3>(16, 2, 2)); - const auto local_range = range_cast(range<3>(8, 1, 1)); + task_range = truncate_range({16, 2, 2}); + const auto local_range = truncate_range({8, 1, 1}); tid = dctx.device_compute>(nd_range(task_range, local_range)).submit(); } diff --git a/test/graph_generation_tests.cc b/test/graph_generation_tests.cc index d44c730c2..65e58b31b 100644 --- a/test/graph_generation_tests.cc +++ b/test/graph_generation_tests.cc @@ -66,7 +66,7 @@ TEST_CASE("isa<> RTTI helper correctly handles command hierarchies", "[rtti][com REQUIRE(utils::isa(hec)); auto* const pc = cdag.create(0, 0, 0, 0, subrange<3>{}); REQUIRE(utils::isa(pc)); - auto* const apc = cdag.create(0, 0, 0, GridRegion<3>{}); + auto* const apc = cdag.create(0, 0, 0, region<3>{}); REQUIRE(utils::isa(apc)); } diff --git a/test/integration/backend.cc b/test/integration/backend.cc index 0e3a8d09a..557cced7a 100644 --- a/test/integration/backend.cc +++ b/test/integration/backend.cc @@ -6,12 +6,41 @@ std::abort(); \ } +template +celerity::range truncate_range(const celerity::range<3>& r3) { + celerity::range r = celerity::detail::zeros; + for(int d = 0; d < Dims; ++d) { + r[d] = r3[d]; + } + return r; +} + +template +celerity::id truncate_id(const celerity::id<3>& i3) { + celerity::id i; + for(int d = 0; d < Dims; ++d) { + i[d] = i3[d]; + } + return i; +} + +template +celerity::subrange truncate_subrange(const celerity::subrange<3>& sr3) { + celerity::subrange sr; + for(int d = 0; d < Dims; ++d) { + sr.offset[d] = sr3.offset[d]; + sr.range[d] = sr3.range[d]; + } + return sr; +} + + template struct kernel_name {}; template void test_copy(celerity::distr_queue& q) { - celerity::buffer buf(celerity::detail::range_cast(celerity::range<3>{5, 7, 9})); + celerity::buffer buf(truncate_range({5, 7, 9})); // Initialize on device q.submit([&](celerity::handler& cgh) { @@ -20,7 +49,7 @@ void test_copy(celerity::distr_queue& q) { }); // Check and modify partially on host - const auto sr = celerity::detail::subrange_cast(celerity::subrange<3>{{1, 2, 3}, {3, 4, 5}}); + const auto sr = truncate_subrange({{1, 2, 3}, {3, 4, 5}}); const auto sr3 = celerity::detail::subrange_cast<3>(sr); q.submit([&](celerity::handler& cgh) { celerity::accessor acc{buf, cgh, celerity::access::fixed{sr}, celerity::read_write_host_task}; @@ -28,10 +57,10 @@ void test_copy(celerity::distr_queue& q) { for(size_t k = 0; k < sr3.range[0]; ++k) { for(size_t j = 0; j < sr3.range[1]; ++j) { for(size_t i = 0; i < sr3.range[2]; ++i) { - const celerity::id<3> idx{sr3.offset[0] + k, sr3.offset[1] + j, sr3.offset[2] + i}; - const auto linear_id = celerity::detail::get_linear_index(buf.get_range(), celerity::detail::id_cast(idx)); - ASSERT(acc[celerity::detail::id_cast(idx)] == linear_id); - acc[celerity::detail::id_cast(idx)] *= 2; + const auto idx = truncate_id({sr3.offset[0] + k, sr3.offset[1] + j, sr3.offset[2] + i}); + const auto linear_id = celerity::detail::get_linear_index(buf.get_range(), idx); + ASSERT(acc[idx] == linear_id); + acc[idx] *= 2; } } } @@ -52,14 +81,13 @@ void test_copy(celerity::distr_queue& q) { for(size_t k = 0; k < r3[0]; ++k) { for(size_t j = 0; j < r3[1]; ++j) { for(size_t i = 0; i < r3[2]; ++i) { - const celerity::id<3> idx{k, j, i}; - const auto is_in_sr = - ((idx >= sr3.offset == celerity::id<3>(true, true, true)) && (idx < sr3.offset + sr3.range == celerity::id<3>(true, true, true))); - const auto linear_id = celerity::detail::get_linear_index(buf.get_range(), celerity::detail::id_cast(idx)); + const auto idx = truncate_id({k, j, i}); + const auto is_in_sr = (celerity::detail::all_true(idx >= sr.offset) && celerity::detail::all_true(idx < sr.offset + sr.range)); + const auto linear_id = celerity::detail::get_linear_index(buf.get_range(), idx); if(is_in_sr) { - ASSERT(acc[celerity::detail::id_cast(idx)] == 2 * linear_id + 1); + ASSERT(acc[idx] == 2 * linear_id + 1); } else { - ASSERT(acc[celerity::detail::id_cast(idx)] == linear_id + 1); + ASSERT(acc[idx] == linear_id + 1); } } } diff --git a/test/print_graph_tests.cc b/test/print_graph_tests.cc index b09a570d4..3de14a09f 100644 --- a/test/print_graph_tests.cc +++ b/test/print_graph_tests.cc @@ -43,11 +43,11 @@ TEST_CASE("task-graph printing is unchanged", "[print_graph][task-graph]") { // replace the `expected` value with the new dot graph. const std::string expected = "digraph G {label=\"Task Graph\" 0[shape=ellipse label=epoch>];1[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
discard_write B1 {[[0,0,0] - [1,1,1]]}>];0->1[color=orchid];2[shape=box style=rounded " - "label=device-compute [0,0,0] - [64,1,1]
discard_write B0 {[[0,0,0] - " - "[64,1,1]]}>];0->2[color=orchid];3[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
(R1) " - "read_write B1 {[[0,0,0] - [1,1,1]]}
read B0 {[[0,0,0] - [64,1,1]]}>];1->3[];2->3[];4[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
read B1 {[[0,0,0] - [1,1,1]]}>];3->4[];}"; + "
device-compute [0,0,0] - [64,1,1]
discard_write B1 {[0,0,0] - [1,1,1]}>];0->1[color=orchid];2[shape=box style=rounded " + "label=device-compute [0,0,0] - [64,1,1]
discard_write B0 {[0,0,0] - " + "[64,1,1]}>];0->2[color=orchid];3[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
(R1) " + "read_write B1 {[0,0,0] - [1,1,1]}
read B0 {[0,0,0] - [64,1,1]}>];1->3[];2->3[];4[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
read B1 {[0,0,0] - [1,1,1]}>];3->4[];}"; CHECK(print_task_graph(tt.trec) == expected); } @@ -78,17 +78,17 @@ TEST_CASE("command graph printing is unchanged", "[print_graph][command-graph]") const std::string expected = "digraph G{label=\"Command Graph\" subgraph cluster_id_0_0{label=<T0 (epoch)>;color=darkgray;id_0_0[label=epoch> fontcolor=black shape=box];}subgraph cluster_id_0_1{label=<T1 \"reduce_8\" " - "(device-compute)>;color=darkgray;id_0_1[label=execution [[0,0,0] - [1,1,1]]
(R1) discard_write B0 {[[0,0,0] - " - "[1,1,1]]}> fontcolor=black shape=box];}subgraph cluster_id_0_2{label=<T2 \"consume_9\" " - "(device-compute)>;color=darkgray;id_0_2[label=execution [[0,0,0] - [1,1,1]]
read B0 {[[0,0,0] - " - "[1,1,1]]}
read_write B0 {[[0,0,0] - [1,1,1]]}
write B0 {[[0,0,0] - [1,1,1]]}> fontcolor=black " + "(device-compute)>;color=darkgray;id_0_1[label=execution [0,0,0] - [1,1,1]
(R1) discard_write B0 {[0,0,0] - " + "[1,1,1]}> fontcolor=black shape=box];}subgraph cluster_id_0_2{label=<T2 \"consume_9\" " + "(device-compute)>;color=darkgray;id_0_2[label=execution [0,0,0] - [1,1,1]
read B0 {[0,0,0] - " + "[1,1,1]}
read_write B0 {[0,0,0] - [1,1,1]}
write B0 {[0,0,0] - [1,1,1]}> fontcolor=black " "shape=box];}id_0_0->id_0_1[color=orchid];id_0_3->id_0_2[];id_0_5->id_0_2[color=limegreen];id_0_6->id_0_2[color=limegreen];id_0_7->id_0_2[color=" - "limegreen];id_0_3[label=reduction R1
B0 {[[0,0,0] - [1,1,1]]}> fontcolor=black " - "shape=ellipse];id_0_1->id_0_3[];id_0_4->id_0_3[];id_0_4[label=(R1) await push transfer 8589934592
BB0 {[[0,0,0] - " - "[1,1,1]]}> fontcolor=black shape=ellipse];id_0_0->id_0_4[color=orchid];id_0_5[label=(R1) push transfer 8589934593 to N1
BB0 " - "[[0,0,0] - [1,1,1]]> fontcolor=black shape=ellipse];id_0_1->id_0_5[];id_0_6[label=(R1) push transfer 8589934594 to N2
BB0 " - "[[0,0,0] - [1,1,1]]> fontcolor=black shape=ellipse];id_0_1->id_0_6[];id_0_7[label=(R1) push transfer 8589934595 to N3
BB0 " - "[[0,0,0] - [1,1,1]]> fontcolor=black shape=ellipse];id_0_1->id_0_7[];}"; + "limegreen];id_0_3[label=reduction R1
B0 {[0,0,0] - [1,1,1]}> fontcolor=black " + "shape=ellipse];id_0_1->id_0_3[];id_0_4->id_0_3[];id_0_4[label=(R1) await push transfer 8589934592
BB0 {[0,0,0] - " + "[1,1,1]}> fontcolor=black shape=ellipse];id_0_0->id_0_4[color=orchid];id_0_5[label=(R1) push transfer 8589934593 to N1
BB0 " + "[0,0,0] - [1,1,1]> fontcolor=black shape=ellipse];id_0_1->id_0_5[];id_0_6[label=(R1) push transfer 8589934594 to N2
BB0 " + "[0,0,0] - [1,1,1]> fontcolor=black shape=ellipse];id_0_1->id_0_6[];id_0_7[label=(R1) push transfer 8589934595 to N3
BB0 " + "[0,0,0] - [1,1,1]> fontcolor=black shape=ellipse];id_0_1->id_0_7[];}"; // fully check node 0 const auto dot0 = dctx.print_command_graph(0); @@ -159,11 +159,11 @@ TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY SECTION("task graph") { const auto* expected = "digraph G {label=\"Task Graph\" 0[shape=ellipse label=epoch>];1[shape=box style=rounded label=device-compute [0,0,0] - [16,1,1]
read_write B0 {[[0,0,0] - [16,1,1]]}>];0->1[color=orchid];2[shape=ellipse " + "
device-compute [0,0,0] - [16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}>];0->1[color=orchid];2[shape=ellipse " "label=horizon>];1->2[color=orange];3[shape=box style=rounded label=device-compute " - "[0,0,0] - [16,1,1]
read_write B0 {[[0,0,0] - [16,1,1]]}>];1->3[];4[shape=ellipse " + "[0,0,0] - [16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}>];1->3[];4[shape=ellipse " "label=horizon>];3->4[color=orange];2->4[color=orange];5[shape=box style=rounded label=device-compute [0,0,0] - [16,1,1]
read_write B0 {[[0,0,0] - [16,1,1]]}>];3->5[];6[shape=ellipse " + "
device-compute [0,0,0] - [16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}>];3->5[];6[shape=ellipse " "label=horizon>];5->6[color=orange];4->6[color=orange];7[shape=ellipse label=epoch>];6->7[color=orange];}"; CHECK(runtime_testspy::print_task_graph(celerity::detail::runtime::get_instance()) == expected); @@ -173,14 +173,14 @@ TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY const auto* expected = "digraph G{label=\"Command Graph\" subgraph cluster_id_0_0{label=<T0 (epoch)>;color=darkgray;id_0_0[label=epoch> fontcolor=black shape=box];}subgraph cluster_id_0_1{label=<T1 \"full_graph_printing_17\" " - "(device-compute)>;color=darkgray;id_0_1[label=execution [[0,0,0] - [16,1,1]]
read_write B0 {[[0,0,0] - " - "[16,1,1]]}> fontcolor=black shape=box];}subgraph cluster_id_0_2{label=<T2 " + "(device-compute)>;color=darkgray;id_0_1[label=execution [0,0,0] - [16,1,1]
read_write B0 {[0,0,0] - " + "[16,1,1]}> fontcolor=black shape=box];}subgraph cluster_id_0_2{label=<T2 " "(horizon)>;color=darkgray;id_0_2[label=horizon> fontcolor=black shape=box];}subgraph cluster_id_0_3{label=<T3 \"full_graph_printing_17\" (device-compute)>;color=darkgray;id_0_3[label=execution [[0,0,0] - " - "[16,1,1]]
read_write B0 {[[0,0,0] - [16,1,1]]}> fontcolor=black shape=box];}subgraph cluster_id_0_4{label=<T4 " + "color=\"#606060\">T3 \"full_graph_printing_17\" (device-compute)>;color=darkgray;id_0_3[label=execution [0,0,0] - " + "[16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}> fontcolor=black shape=box];}subgraph cluster_id_0_4{label=<T4 " "(horizon)>;color=darkgray;id_0_4[label=horizon> fontcolor=black shape=box];}subgraph cluster_id_0_5{label=<T5 \"full_graph_printing_17\" (device-compute)>;color=darkgray;id_0_5[label=execution [[0,0,0] - " - "[16,1,1]]
read_write B0 {[[0,0,0] - [16,1,1]]}> fontcolor=black shape=box];}subgraph cluster_id_0_6{label=<T6 " + "color=\"#606060\">T5 \"full_graph_printing_17\" (device-compute)>;color=darkgray;id_0_5[label=execution [0,0,0] - " + "[16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}> fontcolor=black shape=box];}subgraph cluster_id_0_6{label=<T6 " "(horizon)>;color=darkgray;id_0_6[label=horizon> fontcolor=black shape=box];}subgraph cluster_id_0_7{label=<T7 (epoch)>;color=darkgray;id_0_7[label=epoch (barrier)> fontcolor=black " "shape=box];}id_0_0->id_0_1[];id_0_1->id_0_2[color=orange];id_0_1->id_0_3[];id_0_3->id_0_4[color=orange];id_0_2->id_0_4[color=orange];id_0_3->id_0_" diff --git a/test/region_map_tests.cc b/test/region_map_tests.cc index 4943d26f9..57ca83c1a 100644 --- a/test/region_map_tests.cc +++ b/test/region_map_tests.cc @@ -19,12 +19,12 @@ using namespace celerity; using namespace celerity::detail; -template +template using region_map_impl = region_map_detail::region_map_impl; namespace celerity::detail { struct region_map_testspy { - template + template static void traverse(const region_map_impl& rm, const Callback& cb) { auto recurse = [&cb](auto& node, const size_t level, auto& r) -> void { for(size_t i = 0; i < node.m_child_boxes.size(); ++i) { @@ -39,44 +39,44 @@ struct region_map_testspy { recurse(*rm.m_root, 0, recurse); } - template + template static size_t get_num_leaf_nodes(const region_map_impl& rm) { size_t num_leaf_nodes = 0; - traverse(rm, [&num_leaf_nodes]( - const size_t /* level */, const GridBox& /* box */, const std::optional& value, const size_t /* num_children */) { - if(value.has_value()) { num_leaf_nodes++; } - }); + traverse(rm, + [&num_leaf_nodes](const size_t /* level */, const box& /* box */, const std::optional& value, const size_t /* num_children */) { + if(value.has_value()) { num_leaf_nodes++; } + }); return num_leaf_nodes; } - template + template static size_t get_depth(const region_map_impl& rm) { size_t depth = 1; - traverse(rm, [&depth](const size_t level, const GridBox& /* box */, const std::optional& /* value */, - const size_t /* num_children */) { depth = std::max(depth, level + 1); }); + traverse(rm, [&depth](const size_t level, const box& /* box */, const std::optional& /* value */, const size_t /* num_children */) { + depth = std::max(depth, level + 1); + }); return depth; } - template + template static double compute_overlap(const region_map_impl& rm) { - std::vector>> boxes_by_level; - traverse( - rm, [&boxes_by_level](const size_t level, const GridBox& box, const std::optional& /* value */, const size_t /* num_children */) { - while(boxes_by_level.size() < level + 1) { - boxes_by_level.push_back({}); - } - boxes_by_level[level].push_back(box); - }); + std::vector>> boxes_by_level; + traverse(rm, [&boxes_by_level](const size_t level, const box& box, const std::optional& /* value */, const size_t /* num_children */) { + while(boxes_by_level.size() < level + 1) { + boxes_by_level.push_back({}); + } + boxes_by_level[level].push_back(box); + }); const size_t num_levels = boxes_by_level.size(); - std::vector> box_union_by_level(num_levels, GridRegion{}); + std::vector> box_union_by_level(num_levels, region{}); size_t total_overlap_area = 0; for(size_t l = 0; l < num_levels; ++l) { size_t overlap = 0; for(auto& b : boxes_by_level[l]) { - overlap += GridRegion::intersect(box_union_by_level[l], b).area(); - box_union_by_level[l] = GridRegion::merge(box_union_by_level[l], b); + overlap += region_intersection(box_union_by_level[l], b).get_area(); + box_union_by_level[l] = region_union(box_union_by_level[l], b); } total_overlap_area += overlap; @@ -88,20 +88,20 @@ struct region_map_testspy { } // We return a percentage value of how much area in the entire rm is overlapping (this may exceed 1) - return static_cast(total_overlap_area) / (rm.m_extent.area() * num_levels); + return static_cast(total_overlap_area) / (rm.m_extent.get_area() * num_levels); } - template - static void erase(region_map_impl& rm, const GridBox& box) { + template + static void erase(region_map_impl& rm, const box& box) { rm.erase(box); } - template - static void insert(region_map_impl& rm, const GridBox& box, const ValueType& value) { + template + static void insert(region_map_impl& rm, const box& box, const ValueType& value) { rm.insert(box, value); } - template + template static void try_merge(region_map_impl& rm, std::vector::types::entry> candidates) { rm.try_merge(std::move(candidates)); } @@ -126,7 +126,7 @@ void draw(const region_map_impl& rm) { cairo_select_font_face(cr, "sans", CAIRO_FONT_SLANT_NORMAL, CAIRO_FONT_WEIGHT_NORMAL); cairo_set_font_size(cr, 10.0); - region_map_testspy::traverse(rm, [&](const size_t level, const GridBox<2>& box, const std::optional& value, const size_t num_children) { + region_map_testspy::traverse(rm, [&](const size_t level, const box<2>& box, const std::optional& value, const size_t num_children) { const auto min = box.get_min(); const auto max = box.get_max(); const float inset = 3.f; @@ -183,7 +183,7 @@ void draw(const region_map_impl& rm) { TEST_CASE("region_map::try_merge does not attempt to merge intermediate results that no longer exist", "[region_map]") { region_map_impl rm({99, 99}, -1); - std::vector, int>> entries = { + std::vector, int>> entries = { // These first three entries will be merged {{{0, 0}, {33, 66}}, 1}, {{{33, 0}, {66, 66}}, 1}, @@ -215,7 +215,7 @@ TEST_CASE("region_map::try_merge does not attempt to merge intermediate results } while(0) TEST_CASE("region_map can be moved", "[region_map]") { - constexpr int64_t size = 128; + constexpr size_t size = 128; const int default_value = -1; region_map_impl rm1{{size}, default_value}; rm1.update_box({0, size}, 1337); @@ -243,9 +243,9 @@ TEST_CASE("region_map handles basic operations in 0D", "[region_map]") { } TEST_CASE("region_map handles basic operations in 1D", "[region_map]") { - constexpr int64_t size = 128; - const int default_value = -1; - region_map_impl rm{{size}, default_value}; + constexpr size_t size = 128; + const size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{size}, default_value}; SECTION("query default value") { const auto results = rm.get_region_values({0, size}); @@ -276,22 +276,22 @@ TEST_CASE("region_map handles basic operations in 1D", "[region_map]") { } SECTION("update multiple") { - constexpr int num_parts = 16; - constexpr int slice = size / num_parts; + constexpr size_t num_parts = 16; + constexpr size_t slice = size / num_parts; // Iteratively split line into multiple parts - for(int64_t i = 0; i < num_parts; ++i) { - rm.update_box(GridBox<1>{i * slice, i * slice + slice}, static_cast(i)); + for(size_t i = 0; i < num_parts; ++i) { + rm.update_box(box<1>{i * slice, i * slice + slice}, i); const auto results = rm.get_region_values({0, size}); REQUIRE_LOOP(results.size() == static_cast(i + (i < (num_parts - 1) ? 2 : 1))); - for(int64_t j = 0; j < i + 1; ++j) { + for(size_t j = 0; j < i + 1; ++j) { REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [j, slice](auto& r) { - return r == std::pair{GridBox<1>{j * slice, j * slice + slice}, static_cast(j)}; + return r == std::pair{box<1>{j * slice, j * slice + slice}, j}; })); } if(i < num_parts - 1) { // Check that original value still exists REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [i, slice](auto& r) { - return r == std::pair{GridBox<1>{(i + 1) * slice, size}, -1}; + return r == std::pair{box<1>{(i + 1) * slice, size}, std::numeric_limits::max()}; })); } } @@ -299,10 +299,10 @@ TEST_CASE("region_map handles basic operations in 1D", "[region_map]") { } TEST_CASE("region_map handles basic operations in 2D", "[region_map]") { - constexpr int64_t height = 128; - constexpr int64_t width = 192; - constexpr int default_value = -1; - region_map_impl rm{{height, width}, default_value}; + constexpr size_t height = 128; + constexpr size_t width = 192; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{height, width}, default_value}; SECTION("query default value") { const auto results = rm.get_region_values({{0, 0}, {height, width}}); @@ -346,71 +346,70 @@ TEST_CASE("region_map handles basic operations in 2D", "[region_map]") { } SECTION("update multiple") { - constexpr int num_rows = 16; - constexpr int row_height = height / num_rows; + constexpr size_t num_rows = 16; + constexpr size_t row_height = height / num_rows; // Iteratively split domain into multiple rows - for(int64_t i = 0; i < num_rows; ++i) { - rm.update_box(GridBox<2>{{i * row_height, 0}, {i * row_height + row_height, width}}, static_cast(i)); + for(size_t i = 0; i < num_rows; ++i) { + rm.update_box(box<2>{{i * row_height, 0}, {i * row_height + row_height, width}}, i); const auto results = rm.get_region_values({{0, 0}, {height, width}}); // Until the last iteration we have to account for the original value. REQUIRE_LOOP(results.size() == static_cast(i + (i < (num_rows - 1) ? 2 : 1))); - for(int64_t j = 0; j < i + 1; ++j) { + for(size_t j = 0; j < i + 1; ++j) { REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [j, row_height](auto& r) { - return r == std::pair{GridBox<2>{{j * row_height, 0}, {j * row_height + row_height, width}}, static_cast(j)}; + return r == std::pair{box<2>{{j * row_height, 0}, {j * row_height + row_height, width}}, j}; })); } if(i < num_rows - 1) { // Check that original value still exists CHECK(std::any_of(results.begin(), results.end(), [i, row_height, default_value](auto& r) { - return r == std::pair{GridBox<2>{{(i + 1) * row_height, 0}, {height, width}}, default_value}; + return r == std::pair{box<2>{{(i + 1) * row_height, 0}, {height, width}}, default_value}; })); } } // Now drive a center column through all of them - rm.update_box(GridBox<2>{{0, 48}, {height, 80}}, -2); + rm.update_box(box<2>{{0, 48}, {height, 80}}, std::numeric_limits::max() - 2); const auto results = rm.get_region_values({{0, 0}, {height, width}}); - CHECK(std::any_of(results.begin(), results.end(), [](auto& r) { return r == std::pair{GridBox<2>{{0, 48}, {height, 80}}, -2}; })); + CHECK(std::any_of(results.begin(), results.end(), [](auto& r) { + return r == std::pair{box<2>{{0, 48}, {height, 80}}, std::numeric_limits::max() - 2}; + })); - for(int64_t i = 0; i < num_rows; ++i) { + for(size_t i = 0; i < num_rows; ++i) { REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [i, row_height](auto& r) { - return r == std::pair{GridBox<2>{{i * row_height, 0}, {i * row_height + row_height, 48}}, static_cast(i)}; + return r == std::pair{box<2>{{i * row_height, 0}, {i * row_height + row_height, 48}}, i}; })); REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [i, row_height](auto& r) { - return r == std::pair{GridBox<2>{{i * row_height, 80}, {i * row_height + row_height, width}}, static_cast(i)}; + return r == std::pair{box<2>{{i * row_height, 80}, {i * row_height + row_height, width}}, i}; })); } } SECTION("update growing from two sides") { - constexpr int num_rows = 16; - constexpr int row_height = height / num_rows; + constexpr size_t num_rows = 16; + constexpr size_t row_height = height / num_rows; // Iteratively split domain into multiple rows, working inwards from two sides - for(int64_t i = 0; i < num_rows / 2; ++i) { - rm.update_box(GridBox<2>{{i * row_height, 0}, {i * row_height + row_height, width}}, static_cast(i)); - rm.update_box( - GridBox<2>{{(num_rows - 1 - i) * row_height, 0}, {(num_rows - 1 - i) * row_height + row_height, width}}, num_rows + static_cast(i)); + for(size_t i = 0; i < num_rows / 2; ++i) { + rm.update_box(box<2>{{i * row_height, 0}, {i * row_height + row_height, width}}, i); + rm.update_box(box<2>{{(num_rows - 1 - i) * row_height, 0}, {(num_rows - 1 - i) * row_height + row_height, width}}, num_rows + i); const auto results = rm.get_region_values({{0, 0}, {height, width}}); // Until the last iteration we have to account for the original value. - REQUIRE_LOOP(results.size() == static_cast(2 * (i + 1) + (i < (num_rows / 2 - 1) ? 1 : 0))); + REQUIRE_LOOP(results.size() == 2 * (i + 1) + (i < (num_rows / 2 - 1) ? 1 : 0)); - for(int64_t j = 0; j < i + 1; ++j) { + for(size_t j = 0; j < i + 1; ++j) { REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [j, row_height](auto& r) { - return r == std::pair{GridBox<2>{{j * row_height, 0}, {j * row_height + row_height, width}}, static_cast(j)}; + return r == std::pair{box<2>{{j * row_height, 0}, {j * row_height + row_height, width}}, j}; })); REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [j, row_height, num_rows](auto& r) { - return r - == std::pair{GridBox<2>{{(num_rows - 1 - j) * row_height, 0}, {(num_rows - 1 - j) * row_height + row_height, width}}, - num_rows + static_cast(j)}; + return r == std::pair{box<2>{{(num_rows - 1 - j) * row_height, 0}, {(num_rows - 1 - j) * row_height + row_height, width}}, num_rows + j}; })); } if(i < num_rows / 2 - 1) { // Check that original value still exists REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [i, row_height, num_rows, default_value](auto& r) { - return r == std::pair{GridBox<2>{{(i + 1) * row_height, 0}, {(num_rows - 1 - i) * row_height, width}}, default_value}; + return r == std::pair{box<2>{{(i + 1) * row_height, 0}, {(num_rows - 1 - i) * row_height, width}}, default_value}; })); } } @@ -418,15 +417,15 @@ TEST_CASE("region_map handles basic operations in 2D", "[region_map]") { // TODO: Also in 1D/3D? SECTION("update boxes random order") { - std::vector, int>> update_boxes; - int x = 100; - constexpr int box_height = height / 16; - constexpr int box_width = width / 16; - for(int64_t i = 0; i < 16; ++i) { - for(int64_t j = 0; j < 16; ++j) { - const GridPoint<2> min = {i * box_height, j * box_width}; - const GridPoint<2> max = min + GridPoint<2>{box_height, box_width}; - update_boxes.push_back(std::pair{GridBox<2>{min, max}, x++}); + std::vector, size_t>> update_boxes; + size_t x = 100; + constexpr size_t box_height = height / 16; + constexpr size_t box_width = width / 16; + for(size_t i = 0; i < 16; ++i) { + for(size_t j = 0; j < 16; ++j) { + const id<2> min = {i * box_height, j * box_width}; + const id<2> max = min + id<2>{box_height, box_width}; + update_boxes.push_back(std::pair{box<2>{min, max}, x++}); } } std::mt19937 g(123); @@ -447,11 +446,11 @@ TEST_CASE("region_map handles basic operations in 2D", "[region_map]") { } TEST_CASE("region_map handles basic operations in 3D", "[region_map]") { - constexpr int64_t depth = 128; - constexpr int64_t height = 192; - constexpr int64_t width = 256; - constexpr int default_value = -1; - region_map_impl rm{{depth, height, width}, default_value}; + constexpr size_t depth = 128; + constexpr size_t height = 192; + constexpr size_t width = 256; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{depth, height, width}, default_value}; SECTION("query default value") { const auto results = rm.get_region_values({{0, 0, 0}, {depth, height, width}}); @@ -511,30 +510,31 @@ TEST_CASE("region_map handles basic operations in 3D", "[region_map]") { } TEMPLATE_TEST_CASE_SIG("region_map updates get clamped to extent", "[region_map]", ((int Dims), Dims), 1, 2, 3) { - const auto extent = range_cast(range<3>(64, 96, 128)); - const auto full_box = GridBox<3>{{0, 0, 0}, {64, 96, 128}}; - region_map_impl rm{extent, 0}; + const auto extent = test_utils::truncate_range({64, 96, 128}); + const auto full_box = test_utils::truncate_box({{0, 0, 0}, {64, 96, 128}}); + region_map_impl rm{extent, 0}; - const auto exceeding_box = region_map_detail::box_cast(GridBox<3>({-32, -16, -8}, {72, 102, 136})); + // TODO boxes based on ids cannot be negative, so we cannot test clamping of the minimum at the moment + const auto exceeding_box = box({}, test_utils::truncate_range({72, 102, 136})); rm.update_box(exceeding_box, 1337); const auto results = rm.get_region_values(exceeding_box); - CHECK_RESULTS(results, {region_map_detail::box_cast(full_box), 1337}); + CHECK_RESULTS(results, {full_box, 1337}); } // This doesn't test anything in paticular, more of a smoke test. TEST_CASE("region_map correctly handles complex queries", "[region_map]") { - region_map_impl rm{{5, 9}, 99999}; + region_map_impl rm{{5, 9}, 99999}; - const std::initializer_list> data = {{{0, 0}, {2, 3}}, {{2, 0}, {5, 2}}, {{2, 2}, {5, 3}}, {{0, 3}, {3, 4}}, {{3, 3}, {4, 4}}, {{4, 3}, {5, 4}}, + const std::initializer_list> data = {{{0, 0}, {2, 3}}, {{2, 0}, {5, 2}}, {{2, 2}, {5, 3}}, {{0, 3}, {3, 4}}, {{3, 3}, {4, 4}}, {{4, 3}, {5, 4}}, {{0, 4}, {1, 9}}, {{1, 4}, {3, 9}}, {{3, 4}, {5, 6}}, {{3, 6}, {5, 7}}, {{3, 7}, {4, 9}}, {{4, 7}, {5, 9}}}; for(size_t i = 0; i < data.size(); ++i) { - rm.update_box(*(data.begin() + i), static_cast(i)); + rm.update_box(*(data.begin() + i), i); } SECTION("query single boxes") { - const auto query_and_check = [&](const GridBox<2>& box, int expected) { + const auto query_and_check = [&](const box<2>& box, size_t expected) { const auto results = rm.get_region_values(box); REQUIRE(results.size() == 1); CHECK(results[0] == std::pair{box, expected}); @@ -552,7 +552,7 @@ TEST_CASE("region_map correctly handles complex queries", "[region_map]") { } SECTION("query overlapping") { - const auto query_and_check = [&](const GridBox<2>& box, const std::vector, int>>& expected) { + const auto query_and_check = [&](const box<2>& box, const std::vector, size_t>>& expected) { const auto results = rm.get_region_values(box); CHECK(results.size() == expected.size()); for(const auto& e : expected) { @@ -577,9 +577,9 @@ TEST_CASE("region_map correctly handles complex queries", "[region_map]") { } TEST_CASE("region map merges entries with the same value upon update in 1D", "[region_map]") { - constexpr int64_t size = 128; - constexpr int default_value = -1; - region_map_impl rm{{size}, default_value}; + constexpr size_t size = 128; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{size}, default_value}; SECTION("simple merge") { rm.update_box({0, 64}, 3); @@ -598,10 +598,10 @@ TEST_CASE("region map merges entries with the same value upon update in 1D", "[r } TEST_CASE("region map merges entries with the same value upon update in 2D", "[region_map]") { - constexpr int64_t height = 64; - constexpr int64_t width = 128; - constexpr int default_value = -1; - region_map_impl rm{{height, width}, default_value}; + constexpr size_t height = 64; + constexpr size_t width = 128; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{height, width}, default_value}; SECTION("simple merge") { rm.update_box({{0, 0}, {height, 64}}, 3); @@ -622,13 +622,13 @@ TEST_CASE("region map merges entries with the same value upon update in 2D", "[r SECTION("merge cascade") { // Same as before, but ensure that the tree is several levels deep // Start by filling the tree with "horizontal bars" of decreasing length, preventing any merges between them - for(int64_t i = 0; i < height / 2; ++i) { + for(size_t i = 0; i < height / 2; ++i) { rm.update_box({{i * 2, 0}, {i * 2 + 2, width - 2 - i * 2}}, 3); } CHECK(region_map_testspy::get_num_leaf_nodes(rm) == 2 * (height / 2)); // Every bar creates two entries (old and new value) CHECK(region_map_testspy::get_depth(rm) > 2); // Tree should be several levels deep by now // Now update the values of the vertical bars, skip last one to prevent merge - for(int64_t i = 0; i < (height / 2) - 1; ++i) { + for(size_t i = 0; i < (height / 2) - 1; ++i) { rm.update_box({{i * 2, width - 2 - i * 2}, {height, width - 2 - i * 2 + 2}}, 3); } CHECK(region_map_testspy::get_num_leaf_nodes(rm) == 2 * (height / 2)); // No merges so far @@ -642,11 +642,11 @@ TEST_CASE("region map merges entries with the same value upon update in 2D", "[r } TEST_CASE("region map merges entries with the same value upon update in 3D", "[region_map]") { - constexpr int64_t depth = 64; - constexpr int64_t height = 96; - constexpr int64_t width = 128; - constexpr int default_value = -1; - region_map_impl rm{{depth, height, width}, default_value}; + constexpr size_t depth = 64; + constexpr size_t height = 96; + constexpr size_t width = 128; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{depth, height, width}, default_value}; SECTION("simple merge, quasi 1D") { rm.update_box({{0, 0, 0}, {depth, 64, width}}, 3); @@ -668,10 +668,10 @@ TEST_CASE("region map merges entries with the same value upon update in 3D", "[r // NOTE: Merging on query is not required (or possible) in 1D: All merges will be done on update. TEST_CASE("region_map merges truncated result boxes with the same value upon querying in 2D", "[region_map]") { - constexpr int64_t height = 5; - constexpr int64_t width = 9; - constexpr int default_value = -1; - region_map_impl rm{{height, width}, default_value}; + constexpr size_t height = 5; + constexpr size_t width = 9; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{height, width}, default_value}; SECTION("simple merge") { // Set up in such a way that values cannot be merged upon update @@ -696,19 +696,17 @@ TEST_CASE("region_map merges truncated result boxes with the same value upon que // The exact result is ambiguous depending on how boxes were merged. However there should always be 3 CHECK(results.size() == 3); // One is the non-mergeable default-initialized section - CHECK(std::any_of(results.begin(), results.end(), [default_value](auto& r) { - return r == std::pair{GridBox<2>{{3, 3}, {height, width}}, default_value}; - })); + CHECK(std::any_of(results.begin(), results.end(), [default_value](auto& r) { return r == std::pair{box<2>{{3, 3}, {height, width}}, default_value}; })); // The other two are either of these two variants const bool variant_1 = std::any_of(results.begin(), results.end(), [](auto& r) { - return r == std::pair{GridBox<2>{{1, 1}, {height, 3}}, 3}; + return r == std::pair{box<2>{{1, 1}, {height, 3}}, size_t(3)}; }) && std::any_of(results.begin(), results.end(), [](auto& r) { - return r == std::pair{GridBox<2>{{1, 3}, {3, width}}, 3}; + return r == std::pair{box<2>{{1, 3}, {3, width}}, size_t(3)}; }); const bool variant_2 = std::any_of(results.begin(), results.end(), [](auto& r) { - return r == std::pair{GridBox<2>{{1, 1}, {3, width}}, 3}; + return r == std::pair{box<2>{{1, 1}, {3, width}}, size_t(3)}; }) && std::any_of(results.begin(), results.end(), [](auto& r) { - return r == std::pair{GridBox<2>{{3, 1}, {height, 3}}, 3}; + return r == std::pair{box<2>{{3, 1}, {height, 3}}, size_t(3)}; }); CHECK(variant_1 != variant_2); } @@ -717,11 +715,11 @@ TEST_CASE("region_map merges truncated result boxes with the same value upon que } TEST_CASE("region_map merges truncated result boxes with the same value upon querying in 3D", "[region_map]") { - constexpr int64_t depth = 32; - constexpr int64_t height = 64; - constexpr int64_t width = 96; - constexpr int default_value = -1; - region_map_impl rm{{depth, height, width}, default_value}; + constexpr size_t depth = 32; + constexpr size_t height = 64; + constexpr size_t width = 96; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{depth, height, width}, default_value}; SECTION("simple merge") { // Setup in such a way that values cannot be merged upon update @@ -739,11 +737,11 @@ TEST_CASE("region_map merges truncated result boxes with the same value upon que } TEST_CASE("region_map supports apply_to_values", "[region_map]") { - constexpr int64_t size = 128; - constexpr int default_value = -1; - region_map_impl rm{{size}, default_value}; + constexpr size_t size = 128; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{size}, default_value}; - const auto query_and_check = [&](const GridBox<1>& box, int expected) { + const auto query_and_check = [&](const box<1>& box, size_t expected) { const auto results = rm.get_region_values(box); CHECK(results.size() == 1); CHECK(results[0] == std::pair{box, expected}); @@ -755,7 +753,7 @@ TEST_CASE("region_map supports apply_to_values", "[region_map]") { rm.update_box({96, size}, 4); SECTION("basic value update") { - rm.apply_to_values([](int v) { return v * v; }); + rm.apply_to_values([](size_t v) { return v * v; }); query_and_check({0, 32}, 1); query_and_check({32, 64}, 4); query_and_check({64, 96}, 9); @@ -764,7 +762,7 @@ TEST_CASE("region_map supports apply_to_values", "[region_map]") { SECTION("same values are merged after update") { CHECK(region_map_testspy::get_num_leaf_nodes(rm) == 4); - rm.apply_to_values([](int v) { return v != 2 ? 42 : 1337; }); + rm.apply_to_values([](size_t v) -> size_t { return v != 2 ? 42 : 1337; }); CHECK(region_map_testspy::get_num_leaf_nodes(rm) == 3); query_and_check({0, 32}, 42); query_and_check({32, 64}, 1337); @@ -776,22 +774,22 @@ TEST_CASE("region_map supports apply_to_values", "[region_map]") { TEST_CASE("inserting consecutive boxes results in zero overlap", "[region_map][performance]") { const bool row_wise_insert = GENERATE(true, false); - const int64_t height = 64; - const int64_t width = 128; - region_map_impl rm{{height, width}, -1}; + const size_t height = 64; + const size_t width = 128; + region_map_impl rm{{height, width}, std::numeric_limits::max()}; - const int64_t count_sqrt = 4; + const size_t count_sqrt = 4; REQUIRE(height % count_sqrt == 0); REQUIRE(width % count_sqrt == 0); - const auto insert_box = [&](const int64_t i, const int64_t j) { - const GridPoint<2> min = {i * (height / count_sqrt), j * (width / count_sqrt)}; - const GridPoint<2> max = min + GridPoint<2>{height / count_sqrt, width / count_sqrt}; + const auto insert_box = [&](const size_t i, const size_t j) { + const id<2> min = {i * (height / count_sqrt), j * (width / count_sqrt)}; + const id<2> max = min + id<2>{height / count_sqrt, width / count_sqrt}; rm.update_box({min, max}, i * count_sqrt + j); }; - for(int64_t i = 0; i < count_sqrt; ++i) { - for(int64_t j = 0; j < count_sqrt; ++j) { + for(size_t i = 0; i < count_sqrt; ++i) { + for(size_t j = 0; j < count_sqrt; ++j) { if(row_wise_insert) { insert_box(i, j); } else { diff --git a/test/runtime_tests.cc b/test/runtime_tests.cc index 4c8542d67..97ccd36f0 100644 --- a/test/runtime_tests.cc +++ b/test/runtime_tests.cc @@ -268,10 +268,10 @@ namespace detail { REQUIRE(bam.get_access_modes(buf_b.get_id()).count(cl::sycl::access::mode::discard_read_write) == 1); const auto reqs_a = bam.get_mode_requirements( buf_a.get_id(), cl::sycl::access::mode::read, tsk->get_dimensions(), {tsk->get_global_offset(), tsk->get_global_size()}, tsk->get_global_size()); - REQUIRE(reqs_a == subrange_to_grid_box(subrange<3>({32, 24, 0}, {32, 128, 1}))); + REQUIRE(reqs_a == box(subrange<3>({32, 24, 0}, {32, 128, 1}))); const auto reqs_b = bam.get_mode_requirements(buf_b.get_id(), cl::sycl::access::mode::discard_read_write, tsk->get_dimensions(), {tsk->get_global_offset(), tsk->get_global_size()}, tsk->get_global_size()); - REQUIRE(reqs_b == subrange_to_grid_box(subrange<3>({}, {5, 18, 74}))); + REQUIRE(reqs_b == box(subrange<3>({}, {5, 18, 74}))); } TEST_CASE("buffer_access_map merges multiple accesses with the same mode", "[task][device_compute_task]") { @@ -279,13 +279,13 @@ namespace detail { bam.add_access(0, std::make_unique>>(subrange<2>{{3, 0}, {10, 20}}, cl::sycl::access::mode::read, range<2>{30, 30})); bam.add_access(0, std::make_unique>>(subrange<2>{{10, 0}, {7, 20}}, cl::sycl::access::mode::read, range<2>{30, 30})); const auto req = bam.get_mode_requirements(0, cl::sycl::access::mode::read, 2, subrange<3>({0, 0, 0}, {100, 100, 1}), {100, 100, 1}); - REQUIRE(req == subrange_to_grid_box(subrange<3>({3, 0, 0}, {14, 20, 1}))); + REQUIRE(req == box(subrange<3>({3, 0, 0}, {14, 20, 1}))); } TEST_CASE("tasks gracefully handle get_requirements() calls for buffers they don't access", "[task]") { buffer_access_map bam; const auto req = bam.get_mode_requirements(0, cl::sycl::access::mode::read, 3, subrange<3>({0, 0, 0}, {100, 1, 1}), {100, 1, 1}); - REQUIRE(req == subrange_to_grid_box(subrange<3>({0, 0, 0}, {0, 0, 0}))); + REQUIRE(req == box<3>()); } namespace foo { @@ -614,12 +614,12 @@ namespace detail { distr_queue q; const int n = 3; - const auto global_offset = detail::id_cast(id<3>{4, 5, 6}); + const auto global_offset = test_utils::truncate_id({4, 5, 6}); buffer linear_id{{n, Dims + 1}}; q.submit([&](handler& cgh) { accessor a{linear_id, cgh, celerity::access::all{}, write_only, no_init}; // all RM is sane because runtime_tests runs single-node - cgh.parallel_for>(detail::range_cast(range<3>{n, 1, 1}), global_offset, [=](celerity::item item) { + cgh.parallel_for>(detail::range_cast(range<1>{n}), global_offset, [=](celerity::item item) { auto i = (item.get_id() - item.get_offset())[0]; for(int d = 0; d < Dims; ++d) { a[i][d] = item[d]; diff --git a/test/system/distr_tests.cc b/test/system/distr_tests.cc index 6e53b5cd6..788ee42d8 100644 --- a/test/system/distr_tests.cc +++ b/test/system/distr_tests.cc @@ -163,7 +163,7 @@ namespace detail { const auto log = log_capture.get_log(); CHECK_THAT(log, ContainsSubstring("digraph G{label=\"Command Graph\"")); CHECK_THAT(log, ContainsSubstring("(R1) await push")); - CHECK_THAT(log, ContainsSubstring("reduction R1
B0 {[[0,0,0] - [1,1,1]]}")); + CHECK_THAT(log, ContainsSubstring("reduction R1
B0 {[0,0,0] - [1,1,1]}")); } #else SKIP_BECAUSE_NO_SCALAR_REDUCTIONS @@ -205,10 +205,10 @@ namespace detail { // Note: We assume a local range size of 165 here, this may not be supported by all devices. - auto global_range = range_cast(range<3>{n * 4 * 3, 3 * 5, 2 * 11}); - auto local_range = range_cast(range<3>{3, 5, 11}); - auto group_range = global_range / local_range; - auto global_offset = id_cast(id<3>{47, 53, 59}); + const auto global_range = test_utils::truncate_range({n * 4 * 3, 3 * 5, 2 * 11}); + const auto local_range = test_utils::truncate_range({3, 5, 11}); + const auto group_range = global_range / local_range; + const auto global_offset = test_utils::truncate_id({47, 53, 59}); buffer geo(global_range); @@ -217,7 +217,7 @@ namespace detail { cgh.parallel_for>(celerity::nd_range{global_range, local_range}, /* global_offset,*/ [=](nd_item item) { auto group = item.get_group(); g[item.get_global_id()] = geometry{// - {item.get_group_linear_id(), range_cast<3>(item.get_group_range()), range_cast<3>(item.get_local_id()), item.get_local_linear_id(), + {item.get_group_linear_id(), range_cast<3>(item.get_group_range()), id_cast<3>(item.get_local_id()), item.get_local_linear_id(), range_cast<3>(item.get_local_range()), id_cast<3>(item.get_global_id()), item.get_global_linear_id(), range_cast<3>(item.get_global_range())}, {id_cast<3>(group.get_group_id()), group.get_group_linear_id(), range_cast<3>(group.get_group_range()), id_cast<3>(group.get_local_id()), @@ -408,22 +408,22 @@ namespace detail { const std::string expected = "digraph G{label=\"Command Graph\" subgraph cluster_id_0_0{label=<T0 (epoch)>;color=darkgray;id_0_0[label=epoch> fontcolor=black shape=box];}subgraph cluster_id_0_1{label=<T1 \"unnamed_kernel\" " - "(device-compute)>;color=darkgray;id_0_1[label=execution [[0,0,0] - [8,16,1]]
read_write B0 {[[0,0,0] - " - "[8,16,1]]}> fontcolor=black shape=box];}subgraph cluster_id_0_2{label=<T2 " + "(device-compute)>;color=darkgray;id_0_1[label=execution [0,0,0] + [8,16,1]
read_write B0 {[0,0,0] - " + "[8,16,1]}> fontcolor=black shape=box];}subgraph cluster_id_0_2{label=<T2 " "(horizon)>;color=darkgray;id_0_2[label=horizon> fontcolor=black shape=box];}subgraph cluster_id_0_3{label=<T3 \"unnamed_kernel\" (device-compute)>;color=darkgray;id_0_3[label=execution [[0,0,0] - " - "[8,16,1]]
read_write B0 {[[0,0,0] - [8,16,1]]}> fontcolor=black shape=box];}subgraph cluster_id_0_4{label=<T3 \"unnamed_kernel\" (device-compute)>;color=darkgray;id_0_3[label=execution [0,0,0] + " + "[8,16,1]
read_write B0 {[0,0,0] - [8,16,1]}> fontcolor=black shape=box];}subgraph cluster_id_0_4{label=<T4 (horizon)>;color=darkgray;id_0_4[label=horizon> fontcolor=black shape=box];}subgraph " "cluster_id_0_5{label=<T5 (epoch)>;color=darkgray;id_0_5[label=epoch (barrier)> " "fontcolor=black " "shape=box];}id_0_0->id_0_1[];id_0_1->id_0_2[color=orange];id_0_1->id_0_3[];id_0_3->id_0_4[color=orange];id_0_2->id_0_4[color=orange];id_0_4->" "id_0_5[color=orange];subgraph cluster_id_1_0{label=<T0 (epoch)>;color=darkgray;id_1_0[label=epoch> fontcolor=crimson shape=box];}subgraph cluster_id_1_1{label=<T1 \"unnamed_kernel\" " - "(device-compute)>;color=darkgray;id_1_1[label=execution [[8,0,0] - [16,16,1]]
read_write B0 {[[8,0,0] " - "- [16,16,1]]}> fontcolor=crimson shape=box];}subgraph cluster_id_1_2{label=<T2 " + "(device-compute)>;color=darkgray;id_1_1[label=execution [8,0,0] + [8,16,1]
read_write B0 {[8,0,0] " + "- [16,16,1]}> fontcolor=crimson shape=box];}subgraph cluster_id_1_2{label=<T2 " "(horizon)>;color=darkgray;id_1_2[label=horizon> fontcolor=crimson shape=box];}subgraph " "cluster_id_1_3{label=<T3 \"unnamed_kernel\" (device-compute)>;color=darkgray;id_1_3[label=execution [[8,0,0] - [16,16,1]]
read_write B0 {[[8,0,0] - [16,16,1]]}> fontcolor=crimson shape=box];}subgraph " + "N1
execution [8,0,0] + [8,16,1]
read_write B0 {[8,0,0] - [16,16,1]}> fontcolor=crimson shape=box];}subgraph " "cluster_id_1_4{label=<T4 (horizon)>;color=darkgray;id_1_4[label=horizon> " "fontcolor=crimson shape=box];}subgraph cluster_id_1_5{label=<T5 (epoch)>;color=darkgray;id_1_5[label=epoch (barrier)> fontcolor=crimson " diff --git a/test/task_graph_tests.cc b/test/task_graph_tests.cc index 1d62d7af0..a7862251f 100644 --- a/test/task_graph_tests.cc +++ b/test/task_graph_tests.cc @@ -391,7 +391,7 @@ namespace detail { CHECK(horizon_tsk->get_dependencies().size() == 3); } - static inline GridRegion<3> make_region(int min, int max) { return GridRegion<3>(GridPoint<3>(min, 0, 0), GridPoint<3>(max, 1, 1)); } + static inline region<3> make_region(size_t min, size_t max) { return box<3>({min, 0, 0}, {max, 1, 1}); } TEST_CASE("task horizons update previous writer data structure", "[task_manager][task-graph][task-horizon]") { auto tt = test_utils::task_test_context{}; diff --git a/test/test_utils.h b/test/test_utils.h index 906e979ea..fa8f5df6f 100644 --- a/test/test_utils.h +++ b/test/test_utils.h @@ -1,7 +1,5 @@ #pragma once -#include "catch2/benchmark/catch_clock.hpp" -#include "catch2/benchmark/catch_optimizer.hpp" #include #include #include @@ -15,6 +13,7 @@ #include #endif +#include // for keep_memory() #include #include @@ -367,36 +366,74 @@ namespace test_utils { Catch::Benchmark::keep_memory(&v); } + // truncate_*(): unchecked versions of *_cast() with signatures friendly to parameter type inference + + template + range truncate_range(const range<3>& r3) { + static_assert(Dims <= 3); + range r = detail::zeros; + for(int d = 0; d < Dims; ++d) { + r[d] = r3[d]; + } + return r; + } + + template + id truncate_id(const id<3>& i3) { + static_assert(Dims <= 3); + id i; + for(int d = 0; d < Dims; ++d) { + i[d] = i3[d]; + } + return i; + } + + template + subrange truncate_subrange(const subrange<3>& sr3) { + return subrange(truncate_id(sr3.offset), truncate_range(sr3.range)); + } + + template + subrange truncate_chunk(const chunk<3>& ck3) { + return chunk(truncate_id(ck3.offset), truncate_range(ck3.range), truncate_range(ck3.global_size)); + } + + template + detail::box truncate_box(const detail::box<3>& b3) { + return detail::box(truncate_id(b3.get_min()), truncate_id(b3.get_max())); + } + } // namespace test_utils } // namespace celerity namespace Catch { -template -struct StringMaker> { - static std::string convert(const celerity::id& value) { - switch(Dims) { - case 1: return fmt::format("{{{}}}", value[0]); - case 2: return fmt::format("{{{}, {}}}", value[0], value[1]); - case 3: return fmt::format("{{{}, {}, {}}}", value[0], value[1], value[2]); - default: return "{}"; - } +template +struct StringMaker> { + static std::string convert(const std::pair& v) { + return fmt::format("({}, {})", Catch::Detail::stringify(v.first), Catch::Detail::stringify(v.second)); } }; -template -struct StringMaker> { - static std::string convert(const celerity::range& value) { - switch(Dims) { - case 1: return fmt::format("{{{}}}", value[0]); - case 2: return fmt::format("{{{}, {}}}", value[0], value[1]); - case 3: return fmt::format("{{{}, {}, {}}}", value[0], value[1], value[2]); - default: return "{}"; - } - } +template +struct StringMaker> { + static std::string convert(const std::optional& v) { return v.has_value() ? Catch::Detail::stringify(*v) : "null"; } }; +#define CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(Type) \ + template \ + struct StringMaker> { \ + static std::string convert(const Type& v) { return fmt::format("{}", v); } \ + }; + +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::id) +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::range) +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::subrange) +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::chunk) +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::detail::box) +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::detail::region) + template <> struct StringMaker { static std::string convert(const sycl::device& d) { From e2560bb0413dbf43e7863561aab63a8c3bdfb765 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Sun, 6 Aug 2023 18:39:44 +0200 Subject: [PATCH 05/17] Remove inclusion of old grid implementation --- include/grid.h | 61 +++------------------------ test/grid_tests.cc | 103 --------------------------------------------- 2 files changed, 7 insertions(+), 157 deletions(-) diff --git a/include/grid.h b/include/grid.h index 90130c5ee..8abb185c9 100644 --- a/include/grid.h +++ b/include/grid.h @@ -1,69 +1,22 @@ #pragma once -#include "workaround.h" #include #include #include - -#include -#include #include -#undef assert_fail // Incompatible with fmt #include "ranges.h" +#include "workaround.h" -namespace celerity { -namespace detail { - - using namespace allscale::api::user::data; - - inline GridPoint<1> id_to_grid_point(id<1> id) { return GridPoint<1>(id[0]); } - - inline GridPoint<2> id_to_grid_point(id<2> id) { return GridPoint<2>(id[0], id[1]); } - - inline GridPoint<3> id_to_grid_point(id<3> id) { return GridPoint<3>(id[0], id[1], id[2]); } - - // The AllScale classes use a different template type for dimensions (size_t), which can lead to some type inference issues. - // We thus have to provide all instantiations explicitly as overloads below. - namespace impl { - - template - GridBox subrange_to_grid_box(const subrange& sr) { - return GridBox(id_to_grid_point(sr.offset), id_to_grid_point(sr.offset + sr.range)); - } - - template - subrange grid_box_to_subrange(const GridBox& box) { - const auto& box_min = box.get_min(); - const auto& box_max = box.get_max(); - id min; - id max; - for(int i = 0; i < Dims; ++i) { - min[i] = box_min[i]; - max[i] = box_max[i]; - } - return subrange{min, range_cast(max - min)}; - } - - } // namespace impl - - - inline GridBox<1> subrange_to_grid_box(const subrange<1>& sr) { return impl::subrange_to_grid_box<1>(sr); } - inline GridBox<2> subrange_to_grid_box(const subrange<2>& sr) { return impl::subrange_to_grid_box<2>(sr); } - inline GridBox<3> subrange_to_grid_box(const subrange<3>& sr) { return impl::subrange_to_grid_box<3>(sr); } - - inline subrange<1> grid_box_to_subrange(const GridBox<1>& box) { return impl::grid_box_to_subrange<1>(box); } - inline subrange<2> grid_box_to_subrange(const GridBox<2>& box) { return impl::grid_box_to_subrange<2>(box); } - inline subrange<3> grid_box_to_subrange(const GridBox<3>& box) { return impl::grid_box_to_subrange<3>(box); } +namespace celerity::detail { - template - class box; +template +class box; - template - class region; +template +class region; -} // namespace detail -} // namespace celerity +} // namespace celerity::detail namespace celerity::detail::grid_detail { diff --git a/test/grid_tests.cc b/test/grid_tests.cc index 00f6749a7..f1bb150fd 100644 --- a/test/grid_tests.cc +++ b/test/grid_tests.cc @@ -450,57 +450,6 @@ TEMPLATE_TEST_CASE_SIG("normalizing a fully mergeable tiling of boxes", "[grid]" } } -template -GridBox to_legacy_box(const box& b) { - GridPoint min; - GridPoint max; - for(int d = 0; d < Dims; ++d) { - min[d] = b.get_min()[d]; - max[d] = b.get_max()[d]; - } - return GridBox(min, max); -} - -template -std::vector> to_legacy_boxes(const std::vector>& boxes) { - std::vector> legacy_boxes(boxes.size()); - std::transform(boxes.begin(), boxes.end(), legacy_boxes.begin(), to_legacy_box); - return legacy_boxes; -} - -template -GridRegion legacy_union(const std::vector>& boxes) { - auto it = boxes.begin(); - GridRegion r(*it++); - while(it != boxes.end()) { - r = GridRegion::merge(r, *it++); - } - return r; -} - -TEMPLATE_TEST_CASE_SIG("legacy: computing the union of a fully mergeable tiling of boxes", "[legacy-grid]", ((int Dims), Dims), 1, 2, 3) { - const auto [label, n] = GENERATE(values>({ - {"small", 4}, - {"medium", 50}, - {"large", 1000}, - })); - - const size_t n_per_side = llrint(pow(n, 1.0 / Dims)); - - const auto boxes_nd = create_box_tiling(n_per_side); - const auto legacy_boxes_nd = to_legacy_boxes(boxes_nd); - - // TODO not entirely fair, we could do a tree-merge for comparison - BENCHMARK(fmt::format("{}, native", label)) { return legacy_union(legacy_boxes_nd); }; - - if constexpr(Dims < 3) { - const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_nd); - const auto legacy_boxes_3d = to_legacy_boxes(boxes_3d); - - BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return legacy_union(legacy_boxes_3d); }; - } -} - // TODO: benchmark small box sets - we want low constant overhead for the common case TEST_CASE("region union - 2d", "[grid]") { @@ -706,43 +655,6 @@ TEST_CASE("performing set operations between randomized regions - 3d", "[grid]") test_utils::black_hole(region_difference(inputs_3d[0], inputs_3d[1])); } -TEST_CASE("legacy: performing set operations between randomized regions - 2d", "[legacy-grid]") { - const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ - {"small", 10, 5, 4}, - {"medium", 50, 1, 50}, - {"large", 200, 20, 100}, - })); - - const std::vector inputs_2d{ - region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 13)), region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 37))}; - const std::vector inputs_3d{region_cast<3>(inputs_2d[0]), region_cast<3>(inputs_2d[1])}; - - const std::vector legacy_inputs_2d{legacy_union(to_legacy_boxes(inputs_2d[0].get_boxes())), legacy_union(to_legacy_boxes(inputs_2d[1].get_boxes()))}; - const std::vector legacy_inputs_3d{legacy_union(to_legacy_boxes(inputs_3d[0].get_boxes())), legacy_union(to_legacy_boxes(inputs_3d[1].get_boxes()))}; - - BENCHMARK(fmt::format("union, {}, native", label)) { return GridRegion<2>::merge(legacy_inputs_2d[0], legacy_inputs_2d[1]); }; - BENCHMARK(fmt::format("union, {}, embedded in 3d", label)) { return GridRegion<3>::merge(legacy_inputs_3d[0], legacy_inputs_3d[1]); }; - BENCHMARK(fmt::format("intersection, {}, native", label)) { return GridRegion<2>::intersect(legacy_inputs_2d[0], legacy_inputs_2d[1]); }; - BENCHMARK(fmt::format("intersection, {}, embedded in 3d", label)) { return GridRegion<3>::intersect(legacy_inputs_3d[0], legacy_inputs_3d[1]); }; - BENCHMARK(fmt::format("difference, {}, native", label)) { return GridRegion<2>::difference(legacy_inputs_2d[0], legacy_inputs_2d[1]); }; - BENCHMARK(fmt::format("difference, {}, embedded in 3d", label)) { return GridRegion<3>::difference(legacy_inputs_3d[0], legacy_inputs_3d[1]); }; -} - -TEST_CASE("legacy: performing set operations between randomized regions - 3d", "[legacy-grid]") { - const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ - {"small", 10, 5, 4}, - //{"medium", 50, 1, 50}, - //{"large", 200, 20, 100}, - })); - - const std::vector inputs_3d{legacy_union(to_legacy_boxes(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 13))), - legacy_union(to_legacy_boxes(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 37)))}; - - BENCHMARK(fmt::format("union, {}, native", label)) { return GridRegion<3>::merge(inputs_3d[0], inputs_3d[1]); }; - BENCHMARK(fmt::format("intersection, {}, native", label)) { return GridRegion<3>::intersect(inputs_3d[0], inputs_3d[1]); }; - BENCHMARK(fmt::format("difference, {}, native", label)) { return GridRegion<3>::difference(inputs_3d[0], inputs_3d[1]); }; -} - std::vector> create_interlocking_boxes(const size_t num_boxes_per_side) { std::vector> boxes; for(size_t i = 0; i < num_boxes_per_side; ++i) { @@ -766,18 +678,3 @@ TEST_CASE("normalizing a fully mergeable, complex tiling of boxes - 2d", "[grid] render_boxes(boxes_2d, fmt::format("{}-input", label)); } - -TEST_CASE("legacy: normalizing a fully mergeable, complex tiling of boxes - 2d", "[legacy-grid]") { - const auto [label, n] = GENERATE(values>({ - {"small", 10}, - {"large", 200}, - })); - - const auto boxes_2d = create_interlocking_boxes(n); - const auto legacy_boxes_2d = to_legacy_boxes(boxes_2d); - const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_2d); - const auto legacy_boxes_3d = to_legacy_boxes(boxes_3d); - - BENCHMARK(fmt::format("{}, native", label)) { return legacy_union(legacy_boxes_2d); }; - BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return legacy_union(legacy_boxes_3d); }; -} \ No newline at end of file From e3dc450f9ae7a6abd5adacd8cd3840de64a016a0 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Sun, 6 Aug 2023 18:47:05 +0200 Subject: [PATCH 06/17] Remove Allscale dependency --- CMakeLists.txt | 4 - include/buffer.h | 1 - vendor/allscale/VERSION | 7 - vendor/allscale/api/core/data.h | 287 -- vendor/allscale/api/core/impl/reference/io.h | 1109 ------ .../allscale/api/core/impl/reference/lock.h | 242 -- .../api/core/impl/reference/profiling.h | 411 -- .../allscale/api/core/impl/reference/queue.h | 380 -- .../core/impl/reference/runtime_predictor.h | 231 -- .../api/core/impl/reference/task_id.h | 257 -- .../api/core/impl/reference/treeture.h | 3031 --------------- .../api/core/impl/sequential/treeture.h | 335 -- vendor/allscale/api/core/io.h | 575 --- vendor/allscale/api/core/prec.h | 486 --- vendor/allscale/api/core/treeture.h | 430 -- vendor/allscale/api/user/algorithm/async.h | 80 - .../algorithm/internal/operation_reference.h | 114 - vendor/allscale/api/user/algorithm/pfor.h | 1758 --------- vendor/allscale/api/user/algorithm/preduce.h | 205 - vendor/allscale/api/user/algorithm/stencil.h | 1474 ------- vendor/allscale/api/user/algorithm/vcycle.h | 272 -- vendor/allscale/api/user/arithmetic.h | 45 - vendor/allscale/api/user/data/adaptive_grid.h | 577 --- vendor/allscale/api/user/data/grid.h | 623 --- vendor/allscale/api/user/data/map.h | 335 -- vendor/allscale/api/user/data/mesh.h | 3444 ----------------- vendor/allscale/api/user/data/scalar.h | 216 -- vendor/allscale/api/user/data/static_grid.h | 342 -- vendor/allscale/api/user/save_to_binary.h | 124 - vendor/allscale/utils/array_utils.h | 37 - vendor/allscale/utils/assert.h | 132 - vendor/allscale/utils/bag.h | 117 - vendor/allscale/utils/bitmanipulation.h | 52 - vendor/allscale/utils/concepts.h | 43 - vendor/allscale/utils/functional_utils.h | 143 - vendor/allscale/utils/io_utils.h | 39 - vendor/allscale/utils/large_array.h | 609 --- vendor/allscale/utils/printer/arrays.h | 15 - vendor/allscale/utils/printer/join.h | 79 - vendor/allscale/utils/printer/pairs.h | 13 - vendor/allscale/utils/printer/set.h | 15 - vendor/allscale/utils/printer/vectors.h | 15 - vendor/allscale/utils/range.h | 80 - vendor/allscale/utils/raw_buffer.h | 53 - vendor/allscale/utils/serializer.h | 500 --- vendor/allscale/utils/serializer/arrays.h | 60 - vendor/allscale/utils/serializer/strings.h | 34 - vendor/allscale/utils/serializer/vectors.h | 52 - vendor/allscale/utils/static_grid.h | 247 -- vendor/allscale/utils/static_map.h | 120 - vendor/allscale/utils/string_utils.h | 12 - vendor/allscale/utils/table.h | 243 -- vendor/allscale/utils/tuple_utils.h | 111 - vendor/allscale/utils/type_list.h | 66 - vendor/allscale/utils/unused.h | 12 - vendor/allscale/utils/vector.h | 415 -- vendor/allscale/utils/vector_utils.h | 78 - 57 files changed, 20787 deletions(-) delete mode 100644 vendor/allscale/VERSION delete mode 100644 vendor/allscale/api/core/data.h delete mode 100644 vendor/allscale/api/core/impl/reference/io.h delete mode 100644 vendor/allscale/api/core/impl/reference/lock.h delete mode 100644 vendor/allscale/api/core/impl/reference/profiling.h delete mode 100644 vendor/allscale/api/core/impl/reference/queue.h delete mode 100644 vendor/allscale/api/core/impl/reference/runtime_predictor.h delete mode 100644 vendor/allscale/api/core/impl/reference/task_id.h delete mode 100644 vendor/allscale/api/core/impl/reference/treeture.h delete mode 100644 vendor/allscale/api/core/impl/sequential/treeture.h delete mode 100644 vendor/allscale/api/core/io.h delete mode 100644 vendor/allscale/api/core/prec.h delete mode 100644 vendor/allscale/api/core/treeture.h delete mode 100644 vendor/allscale/api/user/algorithm/async.h delete mode 100644 vendor/allscale/api/user/algorithm/internal/operation_reference.h delete mode 100644 vendor/allscale/api/user/algorithm/pfor.h delete mode 100644 vendor/allscale/api/user/algorithm/preduce.h delete mode 100644 vendor/allscale/api/user/algorithm/stencil.h delete mode 100644 vendor/allscale/api/user/algorithm/vcycle.h delete mode 100644 vendor/allscale/api/user/arithmetic.h delete mode 100644 vendor/allscale/api/user/data/adaptive_grid.h delete mode 100644 vendor/allscale/api/user/data/grid.h delete mode 100644 vendor/allscale/api/user/data/map.h delete mode 100644 vendor/allscale/api/user/data/mesh.h delete mode 100644 vendor/allscale/api/user/data/scalar.h delete mode 100644 vendor/allscale/api/user/data/static_grid.h delete mode 100644 vendor/allscale/api/user/save_to_binary.h delete mode 100644 vendor/allscale/utils/array_utils.h delete mode 100644 vendor/allscale/utils/assert.h delete mode 100644 vendor/allscale/utils/bag.h delete mode 100644 vendor/allscale/utils/bitmanipulation.h delete mode 100644 vendor/allscale/utils/concepts.h delete mode 100644 vendor/allscale/utils/functional_utils.h delete mode 100644 vendor/allscale/utils/io_utils.h delete mode 100644 vendor/allscale/utils/large_array.h delete mode 100644 vendor/allscale/utils/printer/arrays.h delete mode 100644 vendor/allscale/utils/printer/join.h delete mode 100644 vendor/allscale/utils/printer/pairs.h delete mode 100644 vendor/allscale/utils/printer/set.h delete mode 100644 vendor/allscale/utils/printer/vectors.h delete mode 100644 vendor/allscale/utils/range.h delete mode 100644 vendor/allscale/utils/raw_buffer.h delete mode 100644 vendor/allscale/utils/serializer.h delete mode 100644 vendor/allscale/utils/serializer/arrays.h delete mode 100644 vendor/allscale/utils/serializer/strings.h delete mode 100644 vendor/allscale/utils/serializer/vectors.h delete mode 100644 vendor/allscale/utils/static_grid.h delete mode 100644 vendor/allscale/utils/static_map.h delete mode 100644 vendor/allscale/utils/string_utils.h delete mode 100644 vendor/allscale/utils/table.h delete mode 100644 vendor/allscale/utils/tuple_utils.h delete mode 100644 vendor/allscale/utils/type_list.h delete mode 100644 vendor/allscale/utils/unused.h delete mode 100644 vendor/allscale/utils/vector.h delete mode 100644 vendor/allscale/utils/vector_utils.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 45dc4525d..82e9e76d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -347,10 +347,6 @@ install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/ DESTINATION include/celerity ) -install( - DIRECTORY ${PROJECT_SOURCE_DIR}/vendor/allscale/ - DESTINATION include/celerity/vendor/allscale -) install( FILES ${PROJECT_SOURCE_DIR}/vendor/ctpl_stl.h DESTINATION include/celerity/vendor diff --git a/include/buffer.h b/include/buffer.h index 38f933a40..ef5024daa 100644 --- a/include/buffer.h +++ b/include/buffer.h @@ -3,7 +3,6 @@ #include #include -#include #include "buffer_manager.h" #include "lifetime_extending_state.h" diff --git a/vendor/allscale/VERSION b/vendor/allscale/VERSION deleted file mode 100644 index 1374b2b8a..000000000 --- a/vendor/allscale/VERSION +++ /dev/null @@ -1,7 +0,0 @@ -AllScale API @ d058bb3f2c7782900fce9e5efdf093a16df56d6a -https://github.com/allscale/allscale_api - -Contains small changes in files - api/core/impl/reference/treeture.h - utils/functional_utils.h -to make it compile with MSVC2015. diff --git a/vendor/allscale/api/core/data.h b/vendor/allscale/api/core/data.h deleted file mode 100644 index c2636c877..000000000 --- a/vendor/allscale/api/core/data.h +++ /dev/null @@ -1,287 +0,0 @@ -#pragma once - -#include - -#include "allscale/utils/concepts.h" -#include "allscale/utils/serializer.h" - -namespace allscale { -namespace api { -namespace core { - - namespace sema { - - // c++ versions of the data item element access helper functions, facilitating compiler analysis - template - T& _data_item_element_access(DataItem&, const typename DataItem::region_type&, T& ref) { - return ref; - } - - template - const T& _data_item_element_access(const DataItem&, const typename DataItem::region_type&, const T& ref) { - return ref; - } - - /** - * A user-defined read requirement on a region of a data item. - */ - template - void needs_read_access(const DataItem& item, const typename DataItem::region_type& region) { - int a = 0; a = _data_item_element_access(item,region,a); - }; - - /** - * A user-defined write requirement on a region of a data item. - */ - template - void needs_write_access(const DataItem& item, const typename DataItem::region_type& region) { - int a = 0; _data_item_element_access(item,region,a) = 0; - }; - - /** - * Instruct compiler to ignore dependencies in the enclosing scope. - */ - inline void no_more_dependencies() {}; - - } - - // a macro to wrap up data_item_element_access calls, - // eliminating the overhead of creating a region instance on every access - // the ternary operation enforces type checks even on reference compilations - #ifndef ALLSCALECC - #define data_item_element_access(DataItem,Region,Res) \ - ((false) ? allscale::api::core::sema::_data_item_element_access(DataItem,Region,Res) : Res) - #else - #define data_item_element_access(DataItem,Region,Res) allscale::api::core::sema::_data_item_element_access(DataItem,Region,Res) - #endif - - // --------------------------------------------------------------------------------- - // Regions - // --------------------------------------------------------------------------------- - - - template - struct is_region : public std::false_type {}; - - template - struct is_region::value && - - // regions have to be serializable - utils::is_serializable::value && - - // there has to be an emptiness check - std::is_same::value && - - // there has to be an union operation - std::is_same::value && - - // there has to be an intersection operation - std::is_same::value && - - // there has to be a set difference operation - std::is_same::value && - - // there has to be a span operator, computing the hull of two regions - std::is_same::value, - - void>::type> : public std::true_type {}; - - - - - // --------------------------------------------------------------------------------- - // Fragments - // --------------------------------------------------------------------------------- - - - - template - struct is_fragment : public std::false_type {}; - - template - struct is_fragment::value && - - // fragments need to be constructible for a given region - std::is_same(), std::declval())), F>::value && - - // fragments need to be destructible - std::is_destructible::value && - - // the region covered by the fragment has to be obtainable - std::is_same::value && - - // there has to be a resize operator - std::is_same::value && - - // there is an insert operator importing data from an existing fragment - std::is_same::value && - - // there is a extract operator extracting a region of data from the present fragment - std::is_same::value && - - // there is a insert operator, importing previously extracted data into this fragment - std::is_same::value && - - // can be concerted into a facade - std::is_same::value, - - void>::type> : public std::true_type{}; - - - - - - // --------------------------------------------------------------------------------- - // SharedData - // --------------------------------------------------------------------------------- - - - template - struct is_shared_data : public std::false_type {}; - - template - struct is_shared_data::value && - - // regions have to be serializable - utils::is_serializable::value, - - void>::type> : public std::true_type {}; - - - // --------------------------------------------------------------------------------- - // Facade - // --------------------------------------------------------------------------------- - - - template - struct is_facade : public std::false_type {}; - - template - struct is_facade::value && - - // nor copy-assignable - !std::is_copy_assignable::value && - - // fragments need to be destructible - std::is_destructible::value, - - void>::type> : public std::true_type {}; - - - // --------------------------------------------------------------------------------- - // Data Items - // --------------------------------------------------------------------------------- - - - template - struct is_data_item : public std::false_type {}; - - template - struct is_data_item::value && - is_facade::value && - is_fragment::value && - is_shared_data::value, - void>::type> : public std::true_type {}; - - - template< - typename Fragment - > - struct data_item { - - // make sure the region type is satisfying the concept - static_assert(is_region::value, "Region type must fit region concept!"); - static_assert(is_fragment::value, "Fragment type must fit fragment concept!"); - static_assert(is_shared_data::value, "Shared data type must fit shared data concept!"); - - using fragment_type = Fragment; - using region_type = typename Fragment::region_type; - using facade_type = typename Fragment::facade_type; - using shared_data_type = typename Fragment::shared_data_type; - - // define default init/copy/move support - - data_item() = default; - data_item(data_item&&) = default; - data_item(const data_item&) = delete; - - data_item& operator=(const data_item&) = delete; - data_item& operator=(data_item&&) = default; - }; - - - // --------------------------------------------------------------------------------- - // Utilities - // --------------------------------------------------------------------------------- - - - /** - * A generic utility to compute whether a region a is covering a sub-set of a region b. - */ - template - typename std::enable_if::value,bool>::type - isSubRegion(const R& a, const R& b) { - return R::difference(a,b).empty(); - } - - /** - * A convenience wrapper for computing the span (e.g. convex hull) between two data regions. - */ - template - typename std::enable_if::value,R>::type - span(const R& a, const R& b) { - return R::span(a,b); - } - - /** - * A convince wrapper for merging a number of regions (single element base-case). - */ - template - typename std::enable_if::value,R>::type - merge(const R& a) { - return a; - } - - /** - * A convince wrapper for merging a number of regions (multiple element step-case). - */ - template - typename std::enable_if::value,R>::type - merge(const R& a, const Rs& ... rest) { - return R::merge(a,merge(rest...)); - } - - /** - * A default implementation of shared data for data items that do not need shared any shared data. - */ - struct no_shared_data { - - void store(utils::ArchiveWriter&) const { - // nothing to do - } - - static no_shared_data load(utils::ArchiveReader&) { - return no_shared_data(); - } - - }; - - // make sure the no_shared_data is a shared data instance - static_assert(is_shared_data::value, "no_shared_data type does not fulfill shared data concept!"); - -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/io.h b/vendor/allscale/api/core/impl/reference/io.h deleted file mode 100644 index d4e12e061..000000000 --- a/vendor/allscale/api/core/impl/reference/io.h +++ /dev/null @@ -1,1109 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include - -#ifdef _MSC_VER - // includes - #include - // marcos for function identifiers - #define CLOSE_WRAPPER _close - #define LSEEK_WRAPPER _lseek - #define OPEN_WRAPPER _open - #define READ_WRAPPER _read - #define WRITE_WRAPPER _write - // macros for flags - #define S_IRUSR _S_IREAD - #define S_IWUSR _S_IWRITE -#else - // includes - #include - #include - // marcos for function identifiers - #define CLOSE_WRAPPER close - #define LSEEK_WRAPPER lseek - #define OPEN_WRAPPER open - #define READ_WRAPPER read - #define WRITE_WRAPPER write -#endif - -#include -#include -#include - -#include "allscale/utils/assert.h" -#include "allscale/utils/serializer.h" - - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - /** - * Supported IO modes. - */ - enum class Mode { - Text, Binary - }; - - /** - * The kind of handle to reference entities within an IO manager. - */ - struct Entry { - std::size_t id; - bool operator<(const Entry& other) const { return id < other.id; } - }; - - /** - * A common base class for Input and Output Streams. - */ - class IOStream { - protected: - - Entry entry; - - std::mutex operation_lock; - - IOStream(const Entry& entry) : entry(entry) {} - - IOStream(IOStream&& other) - : entry(other.entry) {} - - public: - - Entry getEntry() const { - return entry; - } - - }; - - /** - * A stream to load data in the form of a stream of entries. - */ - class InputStream : public IOStream { - - template - friend class IOManager; - - public: - struct IStreamWrapper { - std::istream& in; - IStreamWrapper(std::istream& in) : in(in) {} - template - IStreamWrapper& operator>>(T& value) { - in >> value; - return *this; - } - template - T read() { - T value; - in.read((char*)&value, sizeof(T)); - return value; - } - template - IStreamWrapper& read(T& res) { - in.read((char*)&res, sizeof(T)); - return *this; - } - }; - - private: - IStreamWrapper in; - - InputStream(const Entry& entry, std::istream& in) - : IOStream(entry), in(in) {} - - public: - - InputStream(InputStream&& other) - : IOStream(std::move(other)), in(other.in) {} - - template - void atomic(const Body& body) { - // protect output by locking it - std::lock_guard lease(operation_lock); - - // let the body read it's information - body(in); - - // free the lock - automatically - } - - template - void operator>>(T& value) { - atomic([&](IStreamWrapper& in) { in >> value; }); - } - - template - T read() { - T res; - atomic([&](IStreamWrapper& in) { - res = in.read(); - }); - return res; - } - - operator bool() const { - return (bool)in.in; - } - - static InputStream& load(utils::ArchiveReader&) { - assert_not_implemented(); - exit(1); // prevent return warning - } - - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - } - }; - - /** - * A stream to store data in the form of a stream of entries. - */ - class OutputStream : public IOStream { - - template - friend class IOManager; - - public: - struct OStreamWrapper { - std::ostream& out; - OStreamWrapper(std::ostream& out) : out(out) {} - template - OStreamWrapper& operator<<(const T& value) { - out << value; - return *this; - } - OStreamWrapper& operator<<(const char* value) { - out << value; - return *this; - } - template - OStreamWrapper& write(const T& value) { - out.write((char*)&value, sizeof(T)); - return *this; - } - }; - - private: - OStreamWrapper out; - - OutputStream(const Entry& entry, std::ostream& out) - : IOStream(entry), out(out) {} - - public: - - OutputStream(OutputStream&& other) - : IOStream(std::move(other)), out(other.out) {} - - template - void atomic(const Body& body) { - // protect output by locking it - std::lock_guard lease(operation_lock); - - // let the body write it's information - body(out); - - // free the lock - automatically - } - - template - void operator<<(const T& value) { - atomic([&](OStreamWrapper& out) { - out << value; - }); - } - void operator<<(const char* value) { - atomic([&](OStreamWrapper& out) { - out << value; - }); - } - - template - void write(const T& value) { - atomic([&](OStreamWrapper& out) { - out.write(value); - }); - } - - operator bool() const { - return (bool)out.out; - } - - static OutputStream& load(utils::ArchiveReader&) { - assert_not_implemented(); - exit(1); // prevent return warning - } - - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - } - }; - - - - - class MemoryMappedIO { - - Entry entry; - - void* base; - - public: - - MemoryMappedIO(const Entry& entry, void* base) - : entry(entry), base(base) {} - - Entry getEntry() const { - return entry; - } - - protected: - - void* getBase() const { - return base; - } - - }; - - class MemoryMappedInput : public MemoryMappedIO { - - template - friend class IOManager; - - MemoryMappedInput(const Entry& entry, void* base) - : MemoryMappedIO(entry,base) {} - - public: - - template - const T& access() const { - return *static_cast(getBase()); - } - - // -- make it serializable -- - - static MemoryMappedInput load(utils::ArchiveReader&) { - assert_not_implemented(); - exit(1); // prevent return warning - } - - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - } - }; - - class MemoryMappedOutput : public MemoryMappedIO { - - template - friend class IOManager; - - MemoryMappedOutput(const Entry& entry, void* base) - : MemoryMappedIO(entry,base) {} - - public: - - template - T& access() const { - return *static_cast(getBase()); - } - - // -- make it serializable -- - - static MemoryMappedOutput load(utils::ArchiveReader&) { - assert_not_implemented(); - exit(1); // prevent return warning - } - - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - } - }; - - /** - * An IO manager, as the central dispatcher for IO operations. - */ - template - class IOManager { - - /** - * The underlying store. - */ - StorageManager store; - - /** - * The central register of all open output streams. - */ - std::map inputStreams; - - /** - * The central register of all open output streams. - */ - std::map outputStreams; - - /** - * The central register of all open memory mapped inputs. - */ - std::map memoryMappedInputs; - - /** - * The central register of all open memory mapped outputs. - */ - std::map memoryMappedOutputs; - - public: - - ~IOManager() { - // close and destroy all input streams - for(auto& cur : inputStreams) { - closeStream(cur.second); - } - // close and destroy all output streams - for(auto& cur : outputStreams) { - closeStream(cur.second); - } - // close and destroy all memory mapped inputs - for(auto& cur : memoryMappedInputs) { - closeMemoryMappedIO(cur.second); - } - // close and destroy all memory mapped outputs - for(auto& cur : memoryMappedOutputs) { - closeMemoryMappedIO(cur.second); - } - } - - /** - * Creates a new entry with the given name in the underlying storage system. - * - * @param name the name of the entry (e.g. file) - * @param mode whether it is a binary or text file - * @return a entry ID referencing the newly created resource - */ - Entry createEntry(const std::string& name, Mode mode = Mode::Text) { - return store.createEntry(name, mode); - } - - /** - * Register a new output stream with the given name within the system. - * The call will create the underlying file and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the name of the stream to be opened -- nothing happens if already opened - */ - InputStream& openInputStream(Entry entry) { - - // check for present - auto pos = inputStreams.find(entry); - if (pos != inputStreams.end()) return pos->second; - - // create new input stream - InputStream res(entry, *store.createInputStream(entry)); - - // register stream - inputStreams.emplace(entry, std::move(res)); - - // return result - return getInputStream(entry); - } - - /** - * Register a new output stream with the given name within the system. - * The call will create the underlying file and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the name of the stream to be opened -- nothing happens if already opened - */ - OutputStream& openOutputStream(Entry entry) { - - // check for present - auto pos = outputStreams.find(entry); - if (pos != outputStreams.end()) return pos->second; - - // create new input stream - OutputStream res(entry, *store.createOutputStream(entry)); - - // register stream - outputStreams.emplace(entry, std::move(res)); - - // return result - return getOutputStream(entry); - } - - /** - * Register a new memory mapped input with the given name within the system. - * The call will load the underlying storage and prepare input operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the storage entry to be opened -- nothing happens if already opened - */ - MemoryMappedInput openMemoryMappedInput(Entry entry) { - - // check for present - auto pos = memoryMappedInputs.find(entry); - if (pos != memoryMappedInputs.end()) return pos->second; - - // create new input stream - MemoryMappedInput res(entry, store.createMemoryMappedInput(entry)); - - // register stream - memoryMappedInputs.emplace(entry, std::move(res)); - - // return result - return getMemoryMappedInput(entry); - } - - /** - * Register a new memory mapped output with the given name within the system. - * The call will create the underlying storage and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the storage entry to be opened -- nothing happens if already opened - */ - MemoryMappedOutput openMemoryMappedOutput(Entry entry, std::size_t size) { - - // check for present - auto pos = memoryMappedOutputs.find(entry); - if (pos != memoryMappedOutputs.end()) return pos->second; - - // create new input stream - MemoryMappedOutput res(entry, store.createMemoryMappedOutput(entry,size)); - - // register stream - memoryMappedOutputs.emplace(entry, std::move(res)); - - // return result - return getMemoryMappedOutput(entry); - } - - - /** - * Obtains an input stream to read data from a storage entry. - * The storage entry is maintained by the manager and the provided output stream - * is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a stream to append data to - */ - InputStream& getInputStream(Entry entry) { - assert_true(inputStreams.find(entry) != inputStreams.end()); - return inputStreams.find(entry)->second; - } - - /** - * Obtains an output stream to write data to a storage entry. - * The storage entry is maintained by the manager and the provided output stream - * is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a stream to append data to - */ - OutputStream& getOutputStream(Entry entry) { - assert_true(outputStreams.find(entry) != outputStreams.end()); - return outputStreams.find(entry)->second; - } - - /** - * Obtains a memory mapped input to read data from a storage entry. - * The storage entry is maintained by the manager and the provided memory mapped - * input is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a requested memory mapped input - */ - MemoryMappedInput getMemoryMappedInput(Entry entry) { - assert_true(memoryMappedInputs.find(entry) != memoryMappedInputs.end()); - return memoryMappedInputs.find(entry)->second; - } - - /** - * Obtains a memory mapped output to write data to a storage entry. - * The storage entry is maintained by the manager and the provided memory mapped - * output is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a requested memory mapped output - */ - MemoryMappedOutput getMemoryMappedOutput(Entry entry) { - assert_true(memoryMappedOutputs.find(entry) != memoryMappedOutputs.end()); - return memoryMappedOutputs.find(entry)->second; - } - - /** - * Closes the stream with the given name. - */ - void closeInputStream(Entry entry) { - // get the stream - auto pos = inputStreams.find(entry); - if (pos == inputStreams.end()) return; - - // close the stream - closeStream(pos->second); - - // erase the entry - inputStreams.erase(pos); - } - - /** - * Closes the stream with the given name. - */ - void closeOutputStream(Entry entry) { - // get the stream - auto pos = outputStreams.find(entry); - if (pos == outputStreams.end()) return; - - // close the stream - closeStream(pos->second); - - // erase the entry - outputStreams.erase(pos); - } - - /** - * Closes the given stream. - */ - void close(const InputStream& in) { - closeInputStream(in.getEntry()); - } - - /** - * Closes the given stream. - */ - void close(const OutputStream& out) { - closeOutputStream(out.getEntry()); - } - - /** - * Closes the given memory mapped input. - */ - void close(const MemoryMappedInput& in) { - auto pos = memoryMappedInputs.find(in.getEntry()); - if (pos == memoryMappedInputs.end()) return; - - // remove memory mapping - closeMemoryMappedIO(in); - - // erase entry from register - memoryMappedInputs.erase(pos); - } - - /** - * Closes the given memory mapped output. - */ - void close(const MemoryMappedOutput& out) { - auto pos = memoryMappedOutputs.find(out.getEntry()); - if (pos == memoryMappedOutputs.end()) return; - - // remove memory mapping - closeMemoryMappedIO(out); - - // erase entry from register - memoryMappedOutputs.erase(pos); - } - - /** - * Determines whether the given entry exists. - */ - bool exists(Entry entry) const { - return store.exists(entry); - } - - /** - * Deletes the entry with the given name. - */ - void remove(Entry entry) { - store.remove(entry); - } - - private: - - /** - * Closes the given input stream. - */ - void closeStream(InputStream& in) { - // closes the stream - store.close(in.in.in); - } - - /** - * Closes the given output stream. - */ - void closeStream(OutputStream& out) { - // closes the stream - store.close(out.out.out); - } - - /** - * Close the given memory mapped IO connection. - */ - void closeMemoryMappedIO(const MemoryMappedInput& input) { - // closes the memory mapped input - store.close(input); - } - - /** - * Close the given memory mapped IO connection. - */ - void closeMemoryMappedIO(const MemoryMappedOutput& output) { - // closes the memory mapped output - store.close(output); - } - - }; - - - - // ---------------------------------------------------------------------- - // for in-memory buffer operations - // ---------------------------------------------------------------------- - - - struct BufferStorageFactory { - - struct Buffer { - std::string name; - Mode mode; - std::stringstream* stream; - }; - - struct MemoryMappedBuffer { - std::size_t size; - void* base; - }; - - std::size_t counter = 0; - - std::map buffers; - - std::map memoryMappedBuffers; - - ~BufferStorageFactory() { - for(const auto& cur : buffers) delete cur.second.stream; - for(const auto& cur : memoryMappedBuffers) free(cur.second.base); - } - - Entry createEntry(const std::string& name, Mode mode) { - // check for present entry - for(const auto& cur : buffers) { - if (cur.second.name == name) { - return cur.first; - } - } - - // create a new entry - Entry id{counter++}; - Buffer& entry = buffers[id]; - entry.name = name; - entry.mode = mode; - entry.stream = nullptr; - return id; - } - - std::istream* createInputStream(Entry entry) { - - // search for entry - auto pos = buffers.find(entry); - if (pos == buffers.end()) { - assert_fail() << "Unable to create input stream to unknown entity!"; - return nullptr; - } - - - // reuse current stream content - std::stringstream* old = pos->second.stream; - std::stringstream* res = (pos->second.mode == Mode::Binary) ? - new std::stringstream((old) ? old->str() : std::basic_string(), std::ios_base::in | std::ios_base::binary ) : - new std::stringstream((old) ? old->str() : std::basic_string(), std::ios_base::in ); - delete old; - pos->second.stream = res; - return res; - } - - std::ostream* createOutputStream(Entry entry) { - - // search for entry - auto pos = buffers.find(entry); - if (pos == buffers.end()) { - assert_fail() << "Unable to create output stream to unknown entity!"; - return nullptr; - } - - // reuse current stream content - std::stringstream* old = pos->second.stream; - std::stringstream* res = (pos->second.mode == Mode::Binary) ? - new std::stringstream((old) ? old->str() : std::basic_string(), std::ios_base::out | std::ios_base::binary ) : - new std::stringstream((old) ? old->str() : std::basic_string(), std::ios_base::out ); - delete old; - pos->second.stream = res; - return res; - } - - void* createMemoryMappedInput(const Entry& entry) { - // the target buffer needs to be present - auto pos = memoryMappedBuffers.find(entry); - if (pos == memoryMappedBuffers.end()) return nullptr; - return pos->second.base; - } - - void* createMemoryMappedOutput(const Entry& entry, std::size_t size) { - // check whether there is already such a buffer - auto pos = memoryMappedBuffers.find(entry); - if (pos != memoryMappedBuffers.end()) { - // use existing - assert_eq(size,pos->second.size) << "Cannot change size of buffer during re-opening!"; - return pos->second.base; - } - - // create a new buffer - auto& buffer = memoryMappedBuffers[entry]; - buffer.size = size; - buffer.base = std::malloc(size); - return buffer.base; - } - - void close(const MemoryMappedIO&) { - // nothing to do - } - - void close(std::istream&) { - // nothing to do - } - - void close(std::ostream&) { - // nothing to do - } - - bool exists(Entry entry) const { - return buffers.find(entry) != buffers.end(); - } - - void remove(Entry entry) { - auto pos = buffers.find(entry); - if (pos == buffers.end()) return; - delete pos->second.stream; - buffers.erase(pos); - } - }; - - class BufferIOManager : public IOManager { - - }; - - - // ---------------------------------------------------------------------- - // for file IO - // ---------------------------------------------------------------------- - - struct FileStorageFactory { - - using file_descriptor = int; - - struct File { - // general - std::string name; - Mode mode; - - // for memory-mapped files - file_descriptor fd; - std::size_t size; - void* base; - - File(const std::string& name, Mode mode) - : name(name), mode(mode), fd(0), size(0), base(nullptr) {} - - }; - - std::vector files; - - Entry createEntry(const std::string& name, Mode mode) { - // check for present entry - for(std::size_t i=0; i < files.size(); ++i) { - if (files[i].name == name) return Entry{i}; - } - - // create a new entry - Entry id{files.size()}; - files.push_back(File(name,mode)); - return id; - } - - std::istream* createInputStream(Entry entry) { - - // check valid entry id - if (entry.id >= files.size()) { - assert_fail() << "Unable to create input stream to unknown entity!"; - return nullptr; - } - - // create a matching file stream - const File& file = files[entry.id]; - return (file.mode == Mode::Binary) ? - new std::fstream(file.name,std::ios_base::in | std::ios_base::binary) : - new std::fstream(file.name,std::ios_base::in); - } - - std::ostream* createOutputStream(Entry entry) { - - // check valid entry id - if (entry.id >= files.size()) { - assert_fail() << "Unable to create output stream to unknown entity!"; - return nullptr; - } - - // create a matching file stream - const File& file = files[entry.id]; - return (file.mode == Mode::Binary) ? - new std::fstream(file.name,std::ios_base::out | std::ios_base::binary) : - new std::fstream(file.name,std::ios_base::out); - } - - void* createMemoryMappedInput(const Entry& entry) { - - // get a reference to the covered file - File& file = getFile(entry); - - // check that file is not already mapped - assert_true(file.base==nullptr) - << "Error: file already previously opened!"; - - // get the file descriptor - file.fd = getFileDescriptor(file,true); - - // resolve the file size - file.size = getFileSize(file); - -#ifndef _MSC_VER - // map file into address space - file.base = mmap(nullptr,file.size, PROT_READ, MAP_PRIVATE, file.fd, 0); - // check result of mmap - if (!checkMappedAddress(file.base)) file.base = nullptr; -#else - // if no support for memory mapped io, try to read the entire file into a buffer - file.base = malloc(file.size); - auto bytesRead = READ_WRAPPER(file.fd, file.base, (unsigned)file.size); - if (bytesRead < 0) { - free(file.base); - file.base = nullptr; - } -#endif - - // return pointer to base address - return file.base; - } - - void* createMemoryMappedOutput(const Entry& entry, std::size_t size) { - - // get a reference to the covered file - File& file = getFile(entry); - - // check that file is not already mapped - assert_true(file.base==nullptr) - << "Error: file already previously opened!"; - - // get the file descriptor - file.fd = createFile(file,size); - - // fix the file size - file.size = size; - -#ifndef _MSC_VER - // map file into address space - file.base = mmap(nullptr,file.size, PROT_READ | PROT_WRITE, MAP_SHARED, file.fd, 0); - // check result of mmap - if (!checkMappedAddress(file.base)) file.base = nullptr; -#else - file.base = malloc(size); -#endif - - // return pointer to base address - return file.base; - } - - void close(std::istream& stream) { - delete &stream; - } - - void close(std::ostream& stream) { - delete &stream; - } - - void close(const MemoryMappedInput& mmi) { - close(mmi, false); - } - - void close(const MemoryMappedOutput& mmo) { - close(mmo, true); - } - - bool exists(Entry entry) const { - if (entry.id >= files.size()) return false; - struct stat buffer; - return stat(files[entry.id].name.c_str(), &buffer) == 0; - } - - void remove(Entry entry) { - if (entry.id >= files.size()) return; - std::remove(files[entry.id].name.c_str()); - } - - private: - - File& getFile(const Entry& entry) { - - // check valid entry id - if (entry.id >= files.size()) { - assert_fail() << "Unknown file entry: " << entry.id; - return files[0]; - } - - // provide access - return files[entry.id]; - } - - static file_descriptor createFile(const File& file, std::size_t size) { - - // create the new file - auto fd = OPEN_WRAPPER(file.name.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR ); - assert_ne(-1,fd) << "Error creating file " << file.name; - - // fix size of file - LSEEK_WRAPPER(fd,(long)(size-1),SEEK_SET); - - // write a byte at the end - char data = 0; - auto res = WRITE_WRAPPER(fd,&data,1); - assert_eq(1,res) << "Could not write byte at end of file."; - if (res != 1) return 0; - - // move cursor back to start - LSEEK_WRAPPER(fd,0,SEEK_SET); - - // return file descriptor - return fd; - } - - static file_descriptor getFileDescriptor(const File& file, bool readOnly) { - - // get the register entry - if (file.fd > 0) return file.fd; - - // get name of file - const char* name = file.name.c_str(); - - // get file descriptor from file name - auto fd = OPEN_WRAPPER(name, ((readOnly) ? O_RDONLY : O_RDWR ) ); - assert_ne(-1,fd) << "Error opening file " << name; - - // return the obtained file descriptor - return fd; - - } - - static std::size_t getFileSize(const File& file) { - - // get size of file - struct stat fileStat; - auto succ = stat(file.name.c_str(),&fileStat); - assert_eq(0,succ) << "Unable to obtain size of input file: " << file.name; - - if (succ != 0) return 0; - - // get the file size - return fileStat.st_size; - } - - static bool checkMappedAddress(void* addr) { -#ifndef _MSC_VER - // compare with error token - if (addr != MAP_FAILED) return true; - char buffer[2000]; - std::cout << strerror_r(errno,buffer,2000); -#endif - // fail with message if mapping failed - // or if mapped address checking was requested on MSVC platforms - assert_fail() << "Failed to map file into address space!"; - return false; - } - - void close(const MemoryMappedIO& mmio, bool requiresWrite) { - - auto entry = mmio.getEntry(); - - // check valid entry id - if (entry.id >= files.size()) { - assert_fail() << "Unable to close memory mapped input to unknown entity!"; - return; - } - - // get the register entry - File& file = files[entry.id]; - if (!file.base) return; - - int succ = 0; -#ifndef _MSC_VER - // unmap the file from the address space - succ = munmap(file.base, file.size); - assert_eq(0, succ) - << "Unable to unmap file " << file.name; - // if it was not successful, stop it here - if (succ != 0) return; - // silence unused parameter warning - (void)requiresWrite; -#else - // if no support for memory mapped io, just write full buffer contents to file and free buffer - if (requiresWrite) { - auto bytesWritten = WRITE_WRAPPER(file.fd, file.base, (unsigned)file.size); - free(file.base); - assert_le(0, bytesWritten) - << "Unable to write to file " << file.name << ", " << strerror(errno) << " " << file.fd; - } -#endif - - // close the file descriptor - succ = ::CLOSE_WRAPPER(file.fd); - assert_eq(0, succ) << "Unable to close file " << file.name; - - // reset the file descriptor - file.fd = 0; - - // reset the base pointer - file.base = nullptr; - - } - - }; - - class FileIOManager : public IOManager { - FileIOManager() {}; - public: - static FileIOManager& getInstance() { - static FileIOManager manager; - return manager; - } - }; - - - /** - * Obtains access to the singleton instance of the File IO manager. - */ - inline static FileIOManager& getFileIOManager() { - return FileIOManager::getInstance(); - } - - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/lock.h b/vendor/allscale/api/core/impl/reference/lock.h deleted file mode 100644 index aba34c237..000000000 --- a/vendor/allscale/api/core/impl/reference/lock.h +++ /dev/null @@ -1,242 +0,0 @@ -#pragma once - -#include -#include - -#if defined _MSC_VER -// required for YieldProcessor macro -#define NOMINMAX -#include "windows.h" -//#elif defined (__ppc64__) || defined (_ARCH_PPC64) - -#endif - -namespace allscale { -namespace api { -namespace core { -inline namespace simple { - - /* Pause instruction to prevent excess processor bus usage */ - -#ifdef _MSC_VER -#define cpu_relax() YieldProcessor() -#elif defined (__ppc64__) || defined (_ARCH_PPC64) -#define __barrier() __asm__ volatile("": : :"memory") -#define __HMT_low() __asm__ volatile("or 1,1,1 # low priority") -#define __HMT_medium() __asm__ volatile("or 2,2,2 # medium priority") -#define cpu_relax() do { __HMT_low(); __HMT_medium(); __barrier(); } while (0) -#else -#define cpu_relax() __builtin_ia32_pause() -#endif - - class Waiter { - int i; - public: - Waiter() : i(0) {} - - void operator()() { - ++i; - if ((i % 1000) == 0) { - // there was no progress => let others work - std::this_thread::yield(); - } else { - // relax this CPU - cpu_relax(); - } - } - }; - - - - class SpinLock { - std::atomic lck; - public: - - SpinLock() : lck(0) { - } - - void lock() { - Waiter wait; - while(!try_lock()) wait(); - } - - bool try_lock() { - int should = 0; - return lck.compare_exchange_weak(should, 1, std::memory_order_acquire); - } - - void unlock() { - lck.store(0, std::memory_order_release); - } - }; - - /** - * An optimistic read/write lock. - */ - class OptimisticReadWriteLock { - - /** - * The type utilized for the version numbering. - */ - using version_t = std::size_t; - - /** - * The version number. - * - even: there is no write in progress - * - odd: there is a write in progress, do not allow read operations - */ - std::atomic version; - - public: - - /** - * The lease utilized to link start and end of read phases. - */ - class Lease { - friend class OptimisticReadWriteLock; - version_t version; - public: - Lease(version_t version = 0) : version(version) {} - Lease(const Lease& lease) = default; - Lease& operator=(const Lease& other) = default; - Lease& operator=(Lease&& other) = default; - }; - - OptimisticReadWriteLock() : version(0) {} - - /** - * Starts a read phase, making sure that there is currently no - * active concurrent modification going on. The resulting lease - * enables the invoking process to later-on verify that no - * concurrent modifications took place. - */ - Lease start_read() { - Waiter wait; - - // get a snapshot of the lease version - auto v = version.load(std::memory_order_acquire); - - // spin while there is a write in progress - while((v & 0x1) == 1) { - // wait for a moment - wait(); - // get an updated version - v = version.load(std::memory_order_acquire); - } - - // done - return Lease(v); - } - - /** - * Tests whether there have been concurrent modifications since - * the given lease has been issued. - * - * @return true if no updates have been conducted, false otherwise - */ - bool validate(const Lease& lease) { - // check whether version number has changed in the mean-while - return lease.version == version.load(std::memory_order_consume); - } - - /** - * Ends a read phase by validating the given lease. - * - * @return true if no updates have been conducted since the - * issuing of the lease, false otherwise - */ - bool end_read(const Lease& lease) { - // check lease in the end - return validate(lease); - } - - /** - * Starts a write phase on this lock be ensuring exclusive access - * and invalidating any existing read lease. - */ - void start_write() { - Waiter wait; - - // set last bit => make it odd - auto v = version.fetch_or(0x1, std::memory_order_acquire); - - // check for concurrent writes - while((v & 0x1) == 1) { - // wait for a moment - wait(); - // get an updated version - v = version.fetch_or(0x1, std::memory_order_acquire); - } - - // done - } - - /** - * Tries to start a write phase unless there is a currently ongoing - * write operation. In this case no write permission will be obtained. - * - * @return true if write permission has been granted, false otherwise. - */ - bool try_start_write() { - auto v = version.fetch_or(0x1, std::memory_order_acquire); - return !(v & 0x1); - } - - /** - * Updates a read-lease to a write permission by a) validating that the - * given lease is still valid and b) making sure that there is no currently - * ongoing write operation. - * - * @return true if the lease was still valid and write permissions could - * be granted, false otherwise. - */ - bool try_upgrade_to_write(const Lease& lease) { - auto v = version.fetch_or(0x1, std::memory_order_acquire); - - // check whether write privileges have been gained - if (v & 0x1) return false;// there is another writer already - - // check whether there was no write since the gain of the read lock - if (lease.version == v) return true; - - // if there was, undo write update - abort_write(); - - // operation failed - return false; - } - - /** - * Aborts a write operation by reverting to the version number before - * starting the ongoing write, thereby re-validating existing leases. - */ - void abort_write() { - // reset version number - version.fetch_sub(1,std::memory_order_release); - } - - /** - * Ends a write operation by giving up the associated exclusive access - * to the protected data and abandoning the provided write permission. - */ - void end_write() { - // update version number another time - version.fetch_add(1,std::memory_order_release); - } - - /** - * Tests whether currently write permissions have been granted to any - * client by this lock. - * - * @return true if so, false otherwise - */ - bool is_write_locked() const { - return version & 0x1; - } - - }; - -} // end namespace simple -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/profiling.h b/vendor/allscale/api/core/impl/reference/profiling.h deleted file mode 100644 index cffd9f535..000000000 --- a/vendor/allscale/api/core/impl/reference/profiling.h +++ /dev/null @@ -1,411 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "allscale/api/core/impl/reference/task_id.h" - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - /** - * A log entry within the performance log. - */ - class ProfileLogEntry { - - public: - - /** - * Codes enumerating possible events. - */ - enum Kind { - // worker events - WorkerCreated, // < the first event to be logged - WorkerSuspended, // < a worker thread is suspended - WorkerResumed, // < a worker thread is resumed - WorkerDestroyed, // < a worker thread is terminated - - // task events - TaskStolen, // < a task got stolen - TaskSplit, // < a task got split - TaskStarted, // < a task processing got started - TaskEnded, // < a task processing finished - - // control events - EndOfStream, // < the last event, to mark the end of a stream - }; - - private: - - uint64_t time; - - Kind kind; - - TaskID task; - - ProfileLogEntry(uint64_t time, Kind kind) - : time(time), kind(kind), task() {} - - ProfileLogEntry(uint64_t time, Kind kind, TaskID task) - : time(time), kind(kind), task(task) {} - - public: - - ProfileLogEntry() =default; - - // -- observers -- - - uint64_t getTimestamp() const { - return time; - } - - Kind getKind() const { - return kind; - } - - TaskID getTask() const { - return task; - } - - // -- factories -- - - static ProfileLogEntry createWorkerCreatedEntry() { - return ProfileLogEntry(getCurrentTime(), WorkerCreated); - } - - static ProfileLogEntry createWorkerDestroyedEntry() { - return ProfileLogEntry(getCurrentTime(), WorkerDestroyed); - } - - static ProfileLogEntry createWorkerSuspendedEntry() { - return ProfileLogEntry(getCurrentTime(), WorkerSuspended); - } - - static ProfileLogEntry createWorkerResumedEntry() { - return ProfileLogEntry(getCurrentTime(), WorkerResumed); - } - - static ProfileLogEntry createTaskStolenEntry(const TaskID& task) { - return ProfileLogEntry(getCurrentTime(), TaskStolen, task); - } - - static ProfileLogEntry createTaskStartedEntry(const TaskID& task) { - return ProfileLogEntry(getCurrentTime(), TaskStarted, task); - } - - static ProfileLogEntry createTaskEndedEntry(const TaskID& task) { - return ProfileLogEntry(getCurrentTime(), TaskEnded, task); - } - - // -- utility functions -- - - bool operator<(const ProfileLogEntry& other) { - // sort events by time - return time < other.time; - } - - friend std::ostream& operator<<(std::ostream& out, const ProfileLogEntry& entry) { - - out << "@" << entry.time << ":"; - - switch(entry.kind) { - // worker events - case WorkerCreated: return out << "Worker created"; - case WorkerSuspended: return out << "Worker suspended"; - case WorkerResumed: return out << "Worker resumed"; - case WorkerDestroyed: return out << "Worker destroyed"; - - // task events - case TaskStolen: return out << "Task " << entry.task << " stolen"; - case TaskSplit: return out << "Task " << entry.task << " split"; - case TaskStarted: return out << "Task " << entry.task << " started"; - case TaskEnded: return out << "Task " << entry.task << " ended"; - - // everything else - default: return out << "Unknown event!"; - } - } - - private: - - /** - * A utility to retrieve a timestamp for events. - */ - static uint64_t getCurrentTime() { - static thread_local uint64_t last = 0; - - // get current time - uint64_t cur = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now().time_since_epoch() - ).count(); - - // make sure time is progressing - if (cur > last) { - last = cur; - return cur; - } - - // increase by at least one time step - return last+1; - } - - }; - - - - class ProfileLog { - - public: - - // the block size of the log - enum { BATCH_SIZE = 100000 }; - - private: - - using block_t = std::array; - using block_list_t = std::list; - - using block_const_iter = block_list_t::const_iterator; - using block_iter = block_list_t::iterator; - - using entry_const_iter = block_t::const_iterator; - using entry_iter = block_t::iterator; - - // the log entries, organized in blocks of N entries - block_list_t data; - - entry_iter next; - entry_iter endOfBlock; - - public: - - ProfileLog() : next(nullptr), endOfBlock(nullptr) {} - - void addEntry(const ProfileLogEntry& entry) { - // create a new block if necessary - if (next == endOfBlock) { - data.emplace_back(); - next = data.back().begin(); - endOfBlock = data.back().end(); - } - - // insert entry - *next = entry; - ++next; - } - - ProfileLog& operator<<(const ProfileLogEntry& entry) { - addEntry(entry); - return *this; - } - - - // -- log entry iteration -- - - class iterator : public std::iterator { - - block_const_iter b_cur; - block_const_iter b_end; - - entry_const_iter e_cur; - entry_const_iter e_end; - - entry_const_iter log_end; - - public: - - static iterator begin(const block_list_t& blocks, const entry_const_iter& log_end) { - iterator res; - res.b_cur = blocks.begin(); - res.b_end = blocks.end(); - if (res.isEnd()) return res; - res.e_cur = res.b_cur->begin(); - res.e_end = res.b_cur->end(); - res.log_end = log_end; - return res; - } - - static iterator end(const block_list_t& blocks) { - iterator res; - res.b_cur = blocks.end(); - res.b_end = blocks.end(); - return res; - } - - bool operator==(const iterator& other) const { - return isEnd() && other.isEnd(); - } - - bool operator!=(const iterator& other) const { - return !(*this == other); - } - - const ProfileLogEntry& operator*() const { - return *e_cur; - } - - iterator& operator++() { - // go to next entry - ++e_cur; - - // if it is the end of the log => jump to end of iterator range - if (e_cur == log_end) { - b_cur = b_end; - return *this; - } - - // if not end of current block is reached, continue - if (e_cur != e_end) return *this; - - // go to next block - b_cur++; - - // if there is none, mark as done - if (b_cur == b_end) return *this; - - // walk into next block - e_cur = b_cur->begin(); - e_end = b_cur->end(); - return *this; - } - - private: - - bool isEnd() const { - return b_cur == b_end; - } - - }; - - iterator begin() const { - return iterator::begin(data,next); - } - - iterator end() const { - return iterator::end(data); - } - - - void saveTo(std::ostream& out) { - // save the number of blocks - std::size_t num_blocks = data.size(); - out.write((char*)&num_blocks,sizeof(num_blocks)); - - // save the offset of the last block - std::size_t offset = 0; - if (num_blocks > 0) { - offset = next - data.back().begin(); - } - out.write((char*)&offset,sizeof(offset)); - - // save all blocks - for(const auto& cur : data) { - out.write((char*)&cur,sizeof(block_t)); - } - } - - void saveTo(const std::string& file) { - std::fstream trg(file.c_str(), std::ios::out | std::ios::binary); - saveTo(trg); - } - - static ProfileLog loadFrom(std::istream& in) { - // load the number of blocks - std::size_t num_blocks; - in.read((char*)&num_blocks,sizeof(num_blocks)); - - // load the offset for the last block - std::size_t offset; - in.read((char*)&offset,sizeof(offset)); - - ProfileLog log; - for(std::size_t i = 0; i 0) { - log.next = log.data.back().begin() + offset; - } - - // done - return log; - } - - static ProfileLog loadFrom(const std::string& file) { - std::fstream src(file.c_str(), std::ios::in | std::ios::binary); - return loadFrom(src); - } - - }; - - inline std::string getLogFileNameForWorker(int id) { - // create the filename - char filename[17]; - assert_lt(id, 10000) << "Unexpectedly larger number of workers"; - snprintf(filename, 17, "profile_log.%04d", ((unsigned)id)%10000); - return filename; - } - - static inline int& getCurrentWorkerID() { - static thread_local int workerID; - return workerID; - } - - static inline void setCurrentWorkerID(int id) { - getCurrentWorkerID() = id; - } - - namespace detail { - - struct ProfileLogHandler { - ProfileLog log; - - ~ProfileLogHandler() { - // save log to the chosen filename - log.saveTo(getLogFileNameForWorker(getCurrentWorkerID())); - } - }; - - inline ProfileLog& getProfileLog() { - static thread_local ProfileLogHandler logHandler; - return logHandler.log; - } - - inline void logProfilerEventInternal(const ProfileLogEntry& entry) { - getProfileLog() << entry; - } - - } - - - #ifdef ENABLE_PROFILING - - const bool PROFILING_ENABLED = true; - - #define logProfilerEvent(EVENT) \ - allscale::api::core::impl::reference::detail::logProfilerEventInternal(EVENT) - - #else - - const bool PROFILING_ENABLED = false; - - #define logProfilerEvent(EVENT) /* ignore */ - - #endif - - - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/queue.h b/vendor/allscale/api/core/impl/reference/queue.h deleted file mode 100644 index c20bf72a0..000000000 --- a/vendor/allscale/api/core/impl/reference/queue.h +++ /dev/null @@ -1,380 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include "allscale/utils/printer/arrays.h" -#include "allscale/api/core/impl/reference/lock.h" - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - - template - class BoundQueue { - - public: - - static const size_t capacity = Capacity; - - private: - - using guard = std::lock_guard; - - static const size_t buffer_size = capacity + 1; - - mutable SpinLock lock; - - std::array data; - - size_t front; - size_t back; - - public: - - BoundQueue() : lock(), front(0), back(0) { - for(auto& cur : data) cur = T(); - } - - bool empty() const { - return front == back; - } - bool full() const { - return ((back + 1) % buffer_size) == front; - } - - bool push_front(const T& t) { - guard g(lock); - if (full()) { - return false; - } - front = (front - 1 + buffer_size) % buffer_size; - data[front] = t; - return true; - } - - bool push_back(const T& t) { - guard g(lock); - if (full()) { - return false; - } - data[back] = t; - back = (back + 1) % buffer_size; - return true; - } - - private: - - T pop_front_internal() { - if (empty()) { - return T(); - } - T res(std::move(data[front])); - front = (front + 1) % buffer_size; - return res; - } - - T pop_back_internal() { - if (empty()) { - return T(); - } - back = (back - 1 + buffer_size) % buffer_size; - T res(std::move(data[back])); - return res; - } - - public: - - T pop_front() { - guard g(lock); - return pop_front_internal(); - } - - T try_pop_front() { - if (!lock.try_lock()) { - return {}; - } - const T& res = pop_front_internal(); - lock.unlock(); - return res; - } - - T pop_back() { - guard g(lock); - return pop_back_internal(); - } - - T try_pop_back() { - if (!lock.try_lock()) { - return {}; - } - const T& res = pop_back_internal(); - lock.unlock(); - return res; - } - - size_t size() const { - guard g(lock); - return (back >= front) ? (back - front) : (buffer_size - (front - back)); - } - - std::vector getSnapshot() const { - std::vector res; - guard g(lock); - size_t i = front; - while(i != back) { - res.push_back(data[i]); - i += (i + 1) % buffer_size; - } - return res; - } - - friend std::ostream& operator<<(std::ostream& out, const BoundQueue& queue) { - guard g(queue.lock); - return out << "[" << queue.data << "," << queue.front << " - " << queue.back << "]"; - } - - }; - - - - template - class UnboundQueue { - - using guard = std::lock_guard; - - mutable SpinLock lock; - - std::list data; - - std::atomic num_entries; - - public: - - UnboundQueue() : lock(), num_entries(0) {} - - void push_front(const T& t) { - guard g(lock); - data.push_front(t); - ++num_entries; - } - - void push_back(const T& t) { - guard g(lock); - data.push_back(t); - ++num_entries; - } - - private: - - T pop_front_internal() { - if (data.empty()) { - return T(); - } - T res(std::move(data.front())); - data.pop_front(); - --num_entries; - return res; - } - - T pop_back_internal() { - if (data.empty()) { - return T(); - } - T res(std::move(data.back())); - data.pop_back(); - --num_entries; - return res; - } - - public: - - T pop_front() { - guard g(lock); - return pop_front_internal(); - } - - T try_pop_front() { - if (!lock.try_lock()) { - return {}; - } - const T& res = pop_front_internal(); - lock.unlock(); - return res; - } - - T pop_back() { - guard g(lock); - return pop_back_internal(); - } - - T try_pop_back() { - if (!lock.try_lock()) { - return {}; - } - const T& res = pop_back_internal(); - lock.unlock(); - return res; - } - - bool empty() const { - return num_entries == 0; - } - - size_t size() const { - return num_entries; - } - - std::vector getSnapshot() const { - guard g(lock); - return std::vector(data.begin(),data.end()); - } - - }; - - - template - class OptimisticUnboundQueue { - - mutable OptimisticReadWriteLock lock; - - std::list data; - - std::atomic num_entries; - - public: - - OptimisticUnboundQueue() : lock(), num_entries(0) {} - - void push_front(const T& t) { - lock.start_write(); - data.push_front(t); - ++num_entries; - lock.end_write(); - } - - void push_back(const T& t) { - lock.start_write(); - data.push_back(t); - ++num_entries; - lock.end_write(); - } - - private: - - template - T pop_front_internal() { - // manual tail-recursion optimization since - // debug builds may fail to do so - while(true) { - - // start with a read permit - auto lease = lock.start_read(); - - // check whether it is empty - if (data.empty()) { - return T(); - } - - // to retrieve data, upgrade to a write - if (!lock.try_upgrade_to_write(lease)) { - // if upgrade failed, restart procedure if requested - if (tryOnlyOnce) return T(); - continue; // start over again - } - - // now this one has write access (exclusive) - T res(std::move(data.front())); - data.pop_front(); - --num_entries; - - // write is complete - lock.end_write(); - - // done - return res; - - } - } - - template - T pop_back_internal() { - // manual tail-recursion optimization since - // debug builds may fail to do so - while(true) { - - // start with a read permit - auto lease = lock.start_read(); - - // check whether it is empty - if (data.empty()) { - return T(); - } - - // to retrieve data, upgrade to a write - if (!lock.try_upgrade_to_write(lease)) { - // if upgrade failed, restart procedure if requested - if (tryOnlyOnce) return T(); - continue; // start over again - } - - // now this one has write access (exclusive) - T res(std::move(data.back())); - data.pop_back(); - --num_entries; - - // write is complete - lock.end_write(); - - // done - return res; - } - } - - public: - - T pop_front() { - return pop_front_internal(); - } - - T try_pop_front() { - return pop_front_internal(); - } - - T pop_back() { - return pop_back_internal(); - } - - T try_pop_back() { - return pop_back_internal(); - } - - bool empty() const { - return num_entries == 0; - } - - size_t size() const { - return num_entries; - } - - std::vector getSnapshot() const { - lock.start_write(); - std::vector res(data.begin(),data.end()); - lock.end_write(); - return res; - } - - }; - - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/runtime_predictor.h b/vendor/allscale/api/core/impl/reference/runtime_predictor.h deleted file mode 100644 index a4e785066..000000000 --- a/vendor/allscale/api/core/impl/reference/runtime_predictor.h +++ /dev/null @@ -1,231 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#if defined _MSC_VER -#include -#elif defined (__ppc64__) || defined (_ARCH_PPC64) || defined(__powerpc__) || defined(__ppc__) -static __inline__ unsigned long long __rdtsc(void) -{ - int64_t tb; - asm("mfspr %0, 268" : "=r"(tb)); - return tb; -} -#else -#include -#endif - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - /** - * A type to represent a type safe cycle count. - */ - class CycleCount { - - using time_t = unsigned long long; - - time_t value; - - public: - - CycleCount() {} - - CycleCount(time_t value) : value(value) {} - - bool operator==(const CycleCount& other) const { - return value == other.value; - } - - bool operator!=(const CycleCount& other) const { - return value != other.value; - } - - bool operator<(const CycleCount& other) const { - return value < other.value; - } - - bool operator>(const CycleCount& other) const { - return value > other.value; - } - - CycleCount operator+(const CycleCount& other) const { - return value + other.value; - } - - CycleCount operator-(const CycleCount& other) const { - return value - other.value; - } - - time_t count() const { - return value; - } - - static CycleCount zero() { - return 0; - } - - static CycleCount max() { - return std::numeric_limits::max(); - } - - }; - - inline CycleCount operator*(long unsigned int f, const CycleCount& count) { - return f * count.count(); - } - - inline CycleCount operator*(const CycleCount& count, long unsigned int f) { - return count.count() * f; - } - - inline CycleCount operator/(const CycleCount& count, long unsigned int div) { - return count.count() / div; - } - - /** - * A cycle clock for the time prediction. - */ - struct CycleClock { - - using time_point = CycleCount; - using duration = CycleCount; - - static time_point now() { - return __rdtsc(); - } - - }; - - - /** - * A utility to estimate the execution time of tasks on different - * levels of task-decomposition steps. - */ - class RuntimePredictor { - - public: - - using clock = CycleClock; - - using duration = clock::duration; - - enum { MAX_LEVELS = 100 }; - - private: - - /** - * The number of samples recorded per task level. - */ - std::array samples; - - /** - * The current estimates of execution times of tasks. - */ - std::array times; - - public: - - RuntimePredictor(unsigned numWorkers = std::thread::hardware_concurrency()) { - // reset number of collected samples - samples.fill(0); - - // initialize time estimates - times.fill(duration::zero()); - - // initialize execution times up to a given level - for(int i=0; i= MAX_LEVELS) return duration::zero(); - return times[level]; - } - - /** - * Update the predictions for a level. - */ - void registerTime(std::size_t level, const duration& time) { - - // update matching level - updateTime(level,time); - - // update higher levels (with reduced weight) - auto smallerTime = time / 2; - auto largerTime = time * 2; - for(std::size_t d = 1; d < 5; d++) { - - // update higher element - if (d <= level) { - updateTime(level-d,largerTime); - } - - // update smaller element - if (level+d < MAX_LEVELS) { - updateTime(level+d,smallerTime); - } - - // update parameters - smallerTime = smallerTime / 2; - largerTime = largerTime * 2; - } - - } - - /** - * Enable the printing of the predictor state. - */ - friend std::ostream& operator<<(std::ostream& out, const RuntimePredictor& pred) { - out << "Predictions:\n"; - for(int i = 0; i - inline RuntimePredictor& getRuntimePredictor() { - static thread_local RuntimePredictor predictor = RuntimePredictor(); - return predictor; - } - - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/task_id.h b/vendor/allscale/api/core/impl/reference/task_id.h deleted file mode 100644 index 42a86b3e5..000000000 --- a/vendor/allscale/api/core/impl/reference/task_id.h +++ /dev/null @@ -1,257 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "allscale/utils/assert.h" - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - /** - * The path part of a task ID. The path is the part of and ID addressing - * a certain sub-task of a decomposable task. - */ - class TaskPath { - - using path_t = std::uint64_t; - using length_t = std::uint8_t; - - path_t path; - length_t length; - - TaskPath(path_t path, length_t length) : path(path), length(length) {} - - public: - - TaskPath() = default; - - static TaskPath root() { - return TaskPath{0,0}; - } - - bool isRoot() const { - return length == 0; - } - - path_t getPath() const { - return path; - } - - length_t getLength() const { - return length; - } - - bool operator==(const TaskPath& other) const { - return path == other.path && length == other.length; - } - - bool operator!=(const TaskPath& other) const { - return !(*this == other); - } - - bool operator<(const TaskPath& other) const { - // get common prefix length - auto min_len = std::min(length, other.length); - - auto pA = path >> (length - min_len); - auto pB = other.path >> (other.length - min_len); - - // lexicographical compare - if (pA == pB) { - return length < other.length; - } - - // compare prefix comparison - return pA < pB; - } - - bool isPrefixOf(const TaskPath& other) const { - return length < other.length && (path == other.path >> (other.length - length)); - } - - TaskPath getLeftChildPath() const { - assert_lt((std::size_t)length,sizeof(path)*8); - auto res = *this; - res.path = res.path << 1; - ++res.length; - return res; - } - - TaskPath getRightChildPath() const { - auto res = getLeftChildPath(); - res.path = res.path + 1; - return res; - } - - TaskPath& descentLeft() { - path = path << 1; - return *this; - } - - TaskPath& descentRight() { - descentLeft(); - path += 1; - return *this; - } - - // --- path iterator support --- - - enum Direction { - Left = 0, Right = 1 - }; - - - class path_iterator : public std::iterator { - - path_t path; - length_t pos; - Direction cur; - - path_iterator(path_t path, length_t pos, Direction cur) - : path(path), pos(pos), cur(cur) {} - - public: - - static path_iterator begin(path_t path, length_t length) { - if (length == 0) return end(path); - return path_iterator( path, length, Direction((path >> (length-1)) % 2) ); - } - - static path_iterator end(path_t path) { - return path_iterator( path, 0, Left ); - } - - bool operator==(const path_iterator& other) const { - return pos == other.pos && path == other.path; - } - - bool operator!=(const path_iterator& other) const { - return !(*this == other); - } - - const Direction& operator*() const { - return cur; - } - - path_iterator& operator++() { - --pos; - if (pos==0) return *this; // we have reached the end - cur = Direction((path >> (pos-1)) % 2); - return *this; - } - - }; - - path_iterator begin() const { - return path_iterator::begin(path,length); - } - - path_iterator end() const { - return path_iterator::end(path); - } - - - // --- print support --- - - friend std::ostream& operator<<(std::ostream& out, const TaskPath& path) { - for(const auto& cur : path) { - out << "." << cur; - } - return out; - } - - }; - - /** - * An identifier of work items. Each work item is either a root-work-item, - * created by an initial prec call, or a child work item created through the - * splitting of a parent work item. The identifier is tracing this parent-child - * relationship. - * - * E.g. parent work item ID: - * - * T-12.0.1.0.1 - * - * child work items: - * - * T-12.0.1.0.1.0 and WI-12.0.1.0.1.1 - * - */ - class TaskID { - - std::uint64_t id; - TaskPath path; - - public: - - TaskID() = default; - - TaskID(std::uint64_t id) : id(id), path(TaskPath::root()) {} - - TaskID(std::uint64_t id, const TaskPath& path) - : id(id), path(path) {} - - - // -- observers -- - - std::uint64_t getRootID() const { - return id; - } - - const TaskPath& getPath() const { - return path; - } - - auto getDepth() const { - return path.getLength(); - } - - // -- utility functions -- - - bool operator==(const TaskID& other) const { - return id == other.id && path == other.path; - } - - bool operator!=(const TaskID& other) const { - return !(*this == other); - } - - bool operator<(const TaskID& other) const { - // check id - if (id < other.id) return true; - if (id > other.id) return false; - - // compare the paths - return path < other.path; - } - - bool isParentOf(const TaskID& child) const { - return id == child.id && path.isPrefixOf(child.path); - } - - TaskID getLeftChild() const { - return TaskID{ id, path.getLeftChildPath() }; - } - - TaskID getRightChild() const { - return TaskID{ id, path.getRightChildPath() }; - } - - - friend std::ostream& operator<<(std::ostream& out, const TaskID& id) { - return out << "T-" << id.id << id.path; - } - - }; - - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/treeture.h b/vendor/allscale/api/core/impl/reference/treeture.h deleted file mode 100644 index a3b5d16bf..000000000 --- a/vendor/allscale/api/core/impl/reference/treeture.h +++ /dev/null @@ -1,3031 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef __linux__ - #include -#endif - -#include "allscale/utils/assert.h" -#include "allscale/utils/bitmanipulation.h" - -#include "allscale/api/core/impl/reference/lock.h" -#include "allscale/api/core/impl/reference/profiling.h" -#include "allscale/api/core/impl/reference/queue.h" -#include "allscale/api/core/impl/reference/runtime_predictor.h" - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - // ------------------------------------- Declarations ----------------------------------------- - - /** - * The actual treeture, referencing the computation of a value. - */ - template - class treeture; - - /** - * A treeture not yet released to the runtime system for execution. - */ - template - class unreleased_treeture; - - /** - * A handle for a lazily constructed unreleased treeture. This intermediate construct is utilized - * for writing templated code that can be optimized to overhead-less computed values and to facilitate - * the support of the sequence combinator. - */ - template - class lazy_unreleased_treeture; - - /** - * A reference to a task to synchronize upon it. - */ - class task_reference; - - /** - * A class to model task dependencies - */ - template - class dependencies; - - - - // --------------------------------------------------------------------------------------------- - // Internal Forward Declarations - // --------------------------------------------------------------------------------------------- - - - class TaskBase; - - template - class Task; - - - // --------------------------------------------------------------------------------------------- - // Debugging - // --------------------------------------------------------------------------------------------- - - - // -- Declarations -- - - const bool REFERENCE_RUNTIME_DEBUG = false; - - inline std::mutex& getLogMutex() { - static std::mutex m; - return m; - } - - #define LOG(MSG) \ - { \ - if (REFERENCE_RUNTIME_DEBUG) { \ - std::thread::id this_id = std::this_thread::get_id(); \ - std::lock_guard lock(getLogMutex()); \ - std::cerr << "Thread " << this_id << ": " << MSG << "\n"; \ - } \ - } - - const bool DEBUG_SCHEDULE = false; - - #define LOG_SCHEDULE(MSG) \ - { \ - if (DEBUG_SCHEDULE) { \ - std::thread::id this_id = std::this_thread::get_id(); \ - std::lock_guard lock(getLogMutex()); \ - std::cerr << "Thread " << this_id << ": " << MSG << "\n"; \ - } \ - } - - const bool DEBUG_TASKS = false; - - #define LOG_TASKS(MSG) \ - { \ - if (DEBUG_TASKS) { \ - std::thread::id this_id = std::this_thread::get_id(); \ - std::lock_guard lock(getLogMutex()); \ - std::cerr << "Thread " << this_id << ": " << MSG << "\n"; \ - } \ - } - - - - // ----------------------------------------------------------------- - // Monitoring (for Debugging) - // ----------------------------------------------------------------- - - - const bool MONITORING_ENABLED = false; - - namespace monitoring { - - enum class EventType { - Run, RunDirect, Split, Wait, DependencyWait - }; - - struct Event { - - EventType type; - - const TaskBase* task; - - TaskID taskId; - - bool operator==(const Event& other) const { - return other.type == type && other.task == task && other.taskId == taskId; - } - - friend std::ostream& operator<<(std::ostream& out, const Event& e); - }; - - - class ThreadState { - - using guard = std::lock_guard; - - std::thread::id thread_id; - - std::mutex lock; - - std::vector eventStack; - - public: - - ThreadState() : thread_id(std::this_thread::get_id()) { - guard g(getStateLock()); - getStates().push_back(this); - } - - ~ThreadState() { - assert_true(eventStack.empty()); - } - - void pushEvent(const Event& e) { - guard g(lock); - eventStack.push_back(e); - } - - void popEvent(__allscale_unused const Event& e) { - guard g(lock); - assert_eq(e,eventStack.back()); - eventStack.pop_back(); - } - - void dumpState(std::ostream& out) { - guard g(lock); - out << "\nThread: " << thread_id << "\n"; - out << "\tStack:\n"; - for(const auto& cur : eventStack) { - out << "\t\t" << cur << "\n"; - } - out << "\t\t -- top of stack --\n"; - out << "\n"; - } - - static void dumpStates(std::ostream& out) { - // lock states - std::lock_guard g(getStateLock()); - - // provide a hint if there is no information - if (getStates().empty()) { - out << "No thread states recorded."; - if (!MONITORING_ENABLED) { - out << " You can enable it by setting the MONITORING_ENABLED flag in the code base."; - } - out << "\n"; - return; - } - - // print all current states - for(const auto& cur : getStates()) { - cur->dumpState(out); - } - } - - private: - - static std::mutex& getStateLock() { - static std::mutex state_lock; - return state_lock; - } - - static std::vector& getStates() { - static std::vector states; - return states; - } - - }; - - thread_local static ThreadState tl_thread_state; - - - struct Action { - - bool active; - Event e; - - Action() : active(false) {} - - Action(const Event& e) : active(true), e(e) { - // register action - tl_thread_state.pushEvent(e); - } - - Action(Action&& other) : active(other.active), e(other.e) { - other.active = false; - } - - Action(const Action&) = delete; - - Action& operator=(const Action&) = delete; - Action& operator=(Action&&) = delete; - - ~Action() { - if (!active) return; - // remove action from action stack - tl_thread_state.popEvent(e); - } - - }; - - inline Action log(EventType type, const TaskBase* task) { - assert_true(type != EventType::DependencyWait); - if (!MONITORING_ENABLED) return {}; - return Event{type,task,TaskID()}; - } - - inline Action log(EventType type, const TaskID& task) { - assert_true(type == EventType::DependencyWait); - if (!MONITORING_ENABLED) return {}; - return Event{type,nullptr,task}; - } - - } - - - - - // --------------------------------------------------------------------------------------------- - // Task Dependency Manager - // --------------------------------------------------------------------------------------------- - - template - class TaskDependencyManager { - - // dependencies are stored in a linked list - struct Entry { - TaskBase* task; - Entry* next; - }; - - using cell_type = std::atomic; - - enum { num_entries = 1<<(max_depth+1) }; - - // an epoch counter to facilitate re-use - std::atomic epoch; - - // the container for storing task dependencies, pointer tagging is used to test for completeness - cell_type data[num_entries]; - - public: - - TaskDependencyManager(std::size_t epoch = 0) : epoch(epoch) { - for(auto& cur : data) cur = nullptr; - } - - ~TaskDependencyManager() { - for(auto& cur : data) { - if (!isDone(cur)) { - // psalz: MSVC 2015 doesn't like deleting atomic pointers - // directly - delete cur.load(); - cur = nullptr; - } - } - } - - TaskDependencyManager(const TaskDependencyManager&) = delete; - TaskDependencyManager(TaskDependencyManager&&) = delete; - - TaskDependencyManager& operator=(const TaskDependencyManager&) = delete; - TaskDependencyManager& operator=(TaskDependencyManager&&) = delete; - - std::size_t getEpoch() const { - return epoch.load(); - } - - void startEpoch(std::size_t newEpoch) { - // make sure there is a change - assert_ne(epoch.load(),newEpoch); - - // re-set state - epoch = newEpoch; - for(auto& cur : data) { - // there should not be any dependencies left - assert_true(cur == nullptr || isDone(cur)); - - // reset dependencies - cur = nullptr; - } - } - - - /** - * Adds a dependency between the given tasks such that - * task x depends on the completion of the task y. - */ - void addDependency(TaskBase* x, const TaskPath& y); - - void markComplete(const TaskPath& task); - - bool isComplete(const TaskPath& path) const { - return isDone(data[getPosition(path)]); - } - - private: - - std::size_t getPosition(const TaskPath& path) const { - - // get length and path - auto l = path.getLength(); - auto p = path.getPath(); - - // limit length to max_depth - if (l > max_depth) { - p = p >> (l - max_depth); // effective path - l = max_depth; // effective depth - } - - // compute result - return (1 << l) | p; - } - - bool isDone(const Entry* ptr) const { - // if the last bit is set, the task already finished - return (intptr_t)(ptr) & 0x1; - } - - }; - - - - // --------------------------------------------------------------------------------------------- - // Task Family - // --------------------------------------------------------------------------------------------- - - - /** - * A task family is a collection of tasks descending from a common (single) ancestor. - * Task families are created by root-level prec operator calls, and manage the dependencies - * of all its members. - * - * Tasks being created through recursive or combine calls are initially not members of - * any family, but may get adapted (by being the result of a split operation). - */ - class TaskFamily { - - // TODO: make task dependency manager depth target system dependent - - using DependencyManager = TaskDependencyManager<6>; - - // the manager of all dependencies on members of this family - DependencyManager dependencies; - - // a flag determining whether this is a top-level task family - // (it is not created nested by a treeture but by the main thread) - bool top_level; - - public: - - /** - * Creates a new family, using a new ID. - */ - TaskFamily(bool top_level = false) : dependencies(getNextID()), top_level(top_level) {} - - /** - * Obtain the family ID. - */ - std::size_t getId() const { - return dependencies.getEpoch(); - } - - /** - * Tests whether this task family is a top-level family (not nested). - */ - bool isTopLevel() const { - return top_level; - } - - /** - * Tests whether the given sub-task is complete. - */ - bool isComplete(const TaskPath& path) const { - return dependencies.isComplete(path); - } - - /** - * Register a dependency ensuring that a task x is depending on a task y. - */ - void addDependency(TaskBase* x, const TaskPath& y) { - dependencies.addDependency(x,y); - } - - /** - * Mark the given task as being finished. - */ - void markDone(const TaskPath& x) { - dependencies.markComplete(x); - } - - /** - * A family ID generator. - */ - static unsigned getNextID() { - static std::atomic counter(0); - return ++counter; - } - - }; - - - // the pointer type to reference task families - using TaskFamilyPtr = TaskFamily*; - - /** - * A manager keeping track of created families. - */ - class TaskFamilyManager { - - SpinLock lock; - - std::vector> families; - - public: - - TaskFamilyPtr getFreshFamily(bool topLevel) { - std::lock_guard lease(lock); - - // TODO: replace this by a re-use based solution - - // gradually drain old family references - /* - if (families.size() > 20000) { - families.erase(families.begin(),families.begin() + families.size()/2); - } - */ - - // create a new family - families.push_back(std::make_unique(topLevel)); - return families.back().get(); - } - - }; - - - // a factory for a new task family - inline TaskFamilyPtr createFamily(bool topLevel = false) { - static TaskFamilyManager familyManager; - return familyManager.getFreshFamily(topLevel); - } - - - - // --------------------------------------------------------------------------------------------- - // task reference - // --------------------------------------------------------------------------------------------- - - - /** - * A reference to a task utilized for managing task synchronization. Tasks may - * only be synchronized on if they are members of a task family. - */ - class task_reference { - - // a weak reference to a task's family - TaskFamilyPtr family; - - TaskPath path; - - task_reference(const TaskFamilyPtr& family, const TaskPath& path) - : family(family), path(path) {} - - public: - - task_reference() : family(nullptr), path(TaskPath::root()) {} - - task_reference(const TaskBase& task); - - task_reference(const task_reference&) = default; - - task_reference(task_reference&& other) : family(other.family), path(other.path) { - other.family = nullptr; - } - - task_reference& operator=(const task_reference& other) = default; - - task_reference& operator=(task_reference&& other) { - family = other.family; - path = other.path; - other.family = nullptr; - return *this; - } - - bool isDone() const { - return (!family || family->isComplete(path)); - } - - bool valid() const { - return family; - } - - void wait() const; - - task_reference getLeft() const { - return task_reference ( family, path.getLeftChildPath() ); - } - - task_reference getRight() const { - return task_reference ( family, path.getRightChildPath() ); - } - - task_reference& descentLeft() { - path.descentLeft(); - return *this; - } - - task_reference& descentRight() { - path.descentRight(); - return *this; - } - - // -- implementation details -- - - TaskFamilyPtr getFamily() const { - return family; - } - - const TaskPath& getPath() const { - return path; - } - - }; - - - template - struct fixed_sized {}; - - struct dynamic_sized {}; - - /** - * A class to aggregate task dependencies. - */ - template - class dependencies; - - /** - * A specialization for empty task dependencies. - */ - template<> - class dependencies> { - - public: - - bool empty() const { - return true; - } - - std::size_t size() const { - return 0; - } - - const task_reference* begin() const { - return nullptr; - } - - const task_reference* end() const { - return nullptr; - } - - }; - - - /** - * A specialization for fixed-sized task dependencies. - */ - template - class dependencies> { - - template - friend dependencies> concat(const dependencies>&, const dependencies>&); - - std::array list; - - public: - - template - dependencies(const Args& ... args) : list({{args...}}) {} - - dependencies(const dependencies&) = default; - dependencies(dependencies&&) = default; - - dependencies& operator=(const dependencies&) = default; - dependencies& operator=(dependencies&&) = default; - - bool empty() const { - return Size == 0; - } - - std::size_t size() const { - return Size; - } - - const task_reference* begin() const { - return &(list[0]); - } - - const task_reference* end() const { - return begin()+Size; - } - - }; - - /** - * Enables the concatentation of two fixed-sized dependencies lists. - */ - template - dependencies> concat(const dependencies>& a, const dependencies>& b) { - dependencies> res; - for(std::size_t i=0; i - class dependencies { - - using list_type = std::vector; - - list_type* list; - - public: - - dependencies() : list(nullptr) {} - - dependencies(std::vector&& deps) - : list(new list_type(std::move(deps))) {} - - dependencies(const dependencies&) = delete; - - dependencies(dependencies&& other) : list(other.list){ - other.list = nullptr; - } - - ~dependencies() { - delete list; - } - - dependencies& operator=(const dependencies&) = delete; - - dependencies& operator=(dependencies&& other) { - if (list == other.list) return *this; - delete list; - list = other.list; - other.list = nullptr; - return *this; - } - - bool empty() const { - return list == nullptr; - } - - std::size_t size() const { - return (list) ? list->size() : 0; - } - - void add(const task_reference& ref) { - if (!list) list = new list_type(); - list->push_back(ref); - } - - const task_reference* begin() const { - return (list) ? &list->front() : nullptr; - } - - const task_reference* end() const { - return (list) ? (&list->back()) + 1 : nullptr; - } - - }; - - - // --------------------------------------------------------------------------------------------- - // promise - // --------------------------------------------------------------------------------------------- - - - /** - * A promise, forming the connection between a task and a treeture - * waiting for the task's result. - */ - template - class Promise { - - // a marker for delivered values - std::atomic ready; - - // the delivered value - T value; - - public: - - Promise() : ready(false) {} - - Promise(const T& value) - : ready(true), value(value) {} - - bool isReady() const { - return ready; - } - - const T& getValue() const { - return value; - } - - void setValue(const T& newValue) { - value = newValue; - ready = true; - } - }; - - /** - * A specialization for void promises. - */ - template<> - class Promise { - - // a marker for delivered promises - std::atomic ready; - - public: - - Promise(bool ready = false) - : ready(ready) {} - - bool isReady() const { - return ready; - } - - void setReady() { - ready = true; - } - - }; - - - template - using PromisePtr = std::shared_ptr>; - - - // --------------------------------------------------------------------------------------------- - // Tasks - // --------------------------------------------------------------------------------------------- - - - // the RT's interface to a task - class TaskBase { - - public: - - enum class State { - New, // < this task has been created, but not processed by a worker yet - Blocked, // < this task has unfinished dependencies - Ready, // < this task may be processed (scheduled in work queues) - Running, // < this task is running - Aggregating, // < this split task is aggregating results (skipped if not split) - Done // < this task is completed - }; - - friend std::ostream& operator<<(std::ostream& out, const State& state) { - switch(state) { - case State::New: return out << "New"; - case State::Blocked: return out << "Blocked"; - case State::Ready: return out << "Ready"; - case State::Running: return out << "Running"; - case State::Aggregating: return out << "Aggregating"; - case State::Done: return out << "Done"; - } - return out << "Invalid"; - } - - private: - - // the family this task belongs to, if null, this task is an orphan task. - TaskFamilyPtr family; - - // the position of this task within its family - TaskPath path; - - // A cached version of the task ID. This id - // is only valid if this task is not an orphan - TaskID id; - - // the current state of this task - std::atomic state; - - /** - * the number of active dependencies keeping this object alive and - * blocking its execution. Those dependencies include - * +1 for the unreleased treeture, subtracted once the task is released - * +1 for the parent, released once the parent is no longer interested in this task - * +1 for each task this task is waiting for, thus for each dependency - * - * Initially, there are 2 dependencies -- one for the parent, one for the release. - * - * Actions: - * 1 ... this task is started - * 0 ... this task is destroyed - */ - std::atomic num_active_dependencies; - - // indicates whether this task can be split - bool splitable; - - // split task data - TaskBase* left; - TaskBase* right; - - // for the mutation from a simple to a split task - TaskBase* substitute; - - // TODO: get rid of this - bool parallel; - - // for the processing of split tasks - TaskBase* parent; // < a pointer to the parent to be notified upon completion - std::atomic alive_child_counter; // < the number of active child tasks - - // a flag to remember that this task got a substitute, even after the - // substitute got cut lose - std::atomic substituted; - - public: - - TaskBase(bool done = false) - : family(), path(TaskPath::root()), id(TaskFamily::getNextID()), - state(done ? State::Done : State::New), - // one initial control flow dependency, released by treeture release - num_active_dependencies(done ? 1 : 2), - splitable(false), - left(nullptr), right(nullptr), substitute(nullptr), - parallel(false), parent(nullptr), - substituted(false) { - - LOG_TASKS( "Created " << *this ); - - // register this task - if (MONITORING_ENABLED) registerTask(*this); - } - - TaskBase(TaskBase* left, TaskBase* right, bool parallel) - : family(), - path(TaskPath::root()), id(TaskFamily::getNextID()), - state(State::New), - // one initial control flow dependency, released by treeture release - num_active_dependencies(2), - splitable(false), - left(left), right(right), substitute(nullptr), - parallel(parallel), - parent(nullptr), alive_child_counter(0), - substituted(false) { - - LOG_TASKS( "Created " << *this ); - assert(this->left); - assert(this->right); - - // fix the parent pointer - this->left->parent = this; - this->right->parent = this; - - // register this task - if (MONITORING_ENABLED) registerTask(*this); - } - - protected: - - // make the destructor private, such that only this class can destroy itself - virtual ~TaskBase() { - if (MONITORING_ENABLED) unregisterTask(*this); - LOG_TASKS( "Destroying Task " << *this ); - assert_true(isDone()) << getId() << " - " << getState(); - }; - - public: - - // -- observers -- - - const TaskFamilyPtr& getTaskFamily() const { - return family; - } - - const TaskPath& getTaskPath() const { - return path; - } - - TaskID getId() const { - return id; - } - - bool isOrphan() const { - return !family; - } - - std::size_t getDepth() const { - return path.getLength(); - } - - State getState() const { - // the substitute takes over the control of the state - if (substitute) return substitute->state; - return state; - } - - // each implementation is required to provide a runtime predictor - virtual RuntimePredictor& getRuntimePredictor() const = 0; - - // -- mutators -- - - void addDependency(const task_reference& ref) { - addDependencies(&ref,&ref+1); - } - - template - void addDependencies(const Iter& begin, const Iter& end) { - - // ignore empty dependencies - if (begin == end) return; - - // we must still be in the new state - assert_eq(getState(),State::New); - - // this task must not yet be started nor must the parent be lost - assert_le(2,num_active_dependencies); - - // increase the number of active dependencies - num_active_dependencies += (int)(end - begin); - - // register dependencies - for(auto it = begin; it != end; ++it) { - const auto& cur = *it; - - // filter out already completed tasks (some may be orphans) - if (cur.isDone()) { - // notify that one dependency more is completed - dependencyDone(); - // continue with next - continue; - } - - // add dependency - assert_true(cur.getFamily()); - cur.getFamily()->addDependency(this,cur.getPath()); - } - - } - - void adopt(const TaskFamilyPtr& family, const TaskPath& path = TaskPath()) { - // check that this task is not member of another family - assert_true(isOrphan()) << "Can not adopt a member of another family."; - - // check whether there is an actual family - if (!family) return; - - // join the family - this->family = family; - this->path = path; - - // update the id - this->id = TaskID(family->getId(),path); - - // mark as complete, if already complete - if(isDone()) family->markDone(path); - - // propagate adoption to descendants - if (substitute) substitute->adopt(family,path); - if (left) left->adopt(family, path.getLeftChildPath()); - if (right) right->adopt(family, path.getRightChildPath()); - } - - - // -- state transitions -- - - // New -> Blocked - void start(); - - // Blocked -> Ready transition is triggered by the last dependency - - // Ready -> Running - finish() -> Done - void run() { - - // log this event - auto action = monitoring::log(monitoring::EventType::Run, this); - - // process substituted tasks - if (substituted) { - // there is nothing to do - return; - } - - - LOG_TASKS( "Running Task " << *this ); - - // check that it is allowed to run - assert_eq(state, State::Ready); - assert_eq(1,num_active_dependencies); - - // update state - setState(State::Running); - - // process split tasks - if (isSplit()) { // if there is a left, it is a split task - - // check some assumptions - assert(left && right); - - State lState = left->state; - State rState = right->state; - - assert(lState == State::New || lState == State::Done); - assert(rState == State::New || rState == State::Done); - - // run task sequentially if requested - if (!parallel) { - - // TODO: implement sequential execution dependency based - alive_child_counter = 2; - - // process left first - if (lState != State::Done) { - left->start(); - } else { - // notify that this child is done - childDone(*left); - } - - // right child is started by childDone once left is finished - - // done - return; - - } - - // count number of sub-tasks to be started - assert_eq(0,alive_child_counter); - - // check which child tasks need to be started - if (lState == State::New && rState == State::New) { - - // both need to be started - alive_child_counter = 2; - left->start(); - right->start(); - - } else if (lState == State::New) { - - // only left has to be started - alive_child_counter = 1; - left->start(); - - } else if (rState == State::New) { - - // only left has to be started - alive_child_counter = 1; - right->start(); - - } else { - - // perform reduction immediately since sub-tasks are done - finish(); - - // done - return; - } - - // processing complete - - } else { - - // run computation - execute(); - - // finish task - finish(); - - } - } - - // Ready -> Split (if supported, otherwise remains Ready) - virtual bool split() { - // by default, no splitting is supported - assert_fail() << "This should not be reachable!"; - return false; - } - - // wait for the task completion - void wait(); - - bool isDone() const { - // simply check the state of this task - return state == State::Done; - } - - const TaskBase* getLeft() const { - // forward call to substitute if present - if (substitute) return substitute->getLeft(); - return left; - } - - const TaskBase* getRight() const { - // forward call to substitute if present - if (substitute) return substitute->getRight(); - return right; - } - - bool isSplitable() const { - return splitable; - } - - bool isSplit() const { - return (bool)left; - } - - bool isSubstituted() const { - return substituted; - } - - bool isReady() const { - if (substitute) return substitute->isReady(); - return state == State::Ready; - } - - void dependencyDone(); - - protected: - - /** - * A hook to define the operations to be conducted by this - * task instance. This function will only be triggered - * for non-split tasks. - */ - virtual void execute() =0; - - /** - * A hook to define post-operation operations triggered after - * the completion of this task or the completion of its child - * tasks. It should be utilized to retrieve results from - * substitutes or child tasks and aggregate those. - */ - virtual void aggregate() =0; - - void setSplitable(bool value = true) { - splitable = value && getDepth() < 60; - } - - void setSubstitute(TaskBase* newSub) { - - // must only be set once! - assert_false(substitute); - - // can only happen if this task is in blocked or ready state - assert_true(state == State::Blocked || state == State::Ready) - << "Actual state: " << state; - - // and the substitute must be valid - assert_true(newSub); - - // the substitute must be new - assert_true(newSub->state == State::New || newSub->state == State::Done); - - // link substitute -- with this responsibilities are transfered - substitute = newSub; - - // connect substitute to parent - substitute->parent = this; - - // remember that a substitute has been assigned - substituted = true; - - // if the split task is done, this one is done - if (substitute->isDone()) { - - // update state - if (state == State::Blocked) setState(State::Ready); - - // pass through running - setState(State::Running); - - // finish this task - finish(); - - // done - return; - } - - // adapt substitute - substitute->adopt(this->family, this->path); - - // and update this state to ready - if (state == State::Blocked) setState(State::Ready); - - // since the substitute may be processed any time, this may finish - // any time => thus it is in the running state - setState(State::Running); - - // start the substitute - substitute->start(); - - } - - private: - - bool isValidTransition(State from, State to) { - return (from == State::New && to == State::Blocked ) || - (from == State::Blocked && to == State::Ready ) || - (from == State::Ready && to == State::Running ) || - (from == State::Running && to == State::Aggregating ) || - (from == State::Aggregating && to == State::Done ) ; - } - - void setState(State newState) { - - // check correctness of state transitions - assert_true(isValidTransition(state,newState)) - << "Illegal state transition from " << state << " to " << newState; - - // make sure that the task is not released with active dependencies - assert_true(newState != State::Ready || num_active_dependencies == 1 || substituted) - << "Active dependencies: " << num_active_dependencies; - - // update the state - state = newState; - LOG_TASKS( "Updated state: " << *this ); - } - - void childDone(const TaskBase& child) { - - // this task must not be done yet - assert_ne(state,State::Done); - - // check whether it is the substitute - if (substitute == &child) { - - // check state of this task - assert_true(State::Ready == state || State::Running == state) - << "Actual state: " << state; - - // log state change - LOG_TASKS( "Substitute " << *substitute << " of " << *this << " done"); - - // trigger completion of task - finish(); - return; - } - - // make sure this task is still running - assert_eq(State::Running, state) - << "\tis substitute: " << (substitute == &child) << "\n" - << "\tis child left: " << (left == &child) << "\n" - << "\tis child right: " << (right == &child) << "\n"; - - // process a split-child - LOG_TASKS( "Child " << child << " of " << *this << " done" ); - - // if this is a sequential node, start next child - if (!parallel && &child == left) { - - // continue with the right child - if (right->getState() != State::Done) { - right->start(); - } else { - // notify that the right child is also done - childDone(*right); - } - - } - - // decrement active child count - unsigned old_child_count = alive_child_counter.fetch_sub(1); - - // log alive counter - LOG_TASKS( "Child " << child << " of " << *this << " -- alive left: " << (old_child_count - 1) ); - - // check whether this was the last child - if (old_child_count != 1) return; - - // the last child finished => finish this task - finish(); - - // LOG_TASKS( "Child " << child << " of " << *this << " done - processing complete" ); - } - - void parentDone() { - - // check that there is a parent - assert_true(parent); - - // signal that one more dependency is satisfied - dependencyDone(); - - } - - // Running -> Aggregating -> Done - void finish() { - - LOG_TASKS( "Finishing task " << *this ); - - // check precondition - assert_true(state == State::Running) - << "Actual State: " << state << "\nTask: " << *this; - - - // update state to aggregation - setState(State::Aggregating); - - // log aggregation step - LOG( "Aggregating task " << *this ); - - // aggregate result (collect results) - aggregate(); - - // a tool to release dependent tasks - auto release = [](TaskBase* task) { - assert_true(!task || task->isDone()); - if (!task) return; - task->parentDone(); - }; - - // cut lose children - release(left); - release(right); - - // cut lose substitutes - release(substitute); - - // log completion - LOG( "Aggregating task " << *this << " complete" ); - - // job is done - setState(State::Done); - - // copy parent pointer to stack, since the markDone may release this task - TaskBase* locParent = parent; - - // inform the family that the job is done - if (!parent || parent->substitute != this) { - // only due this if you are not the substitute - if (family) family->markDone(path); - - // if there is no parent, don't wait for it to signal its release - if (!parent) dependencyDone(); - } - - // notify parent - if (locParent) { - - // notify parents - parent->childDone(*this); - - } - - } - - // -- support printing of tasks for debugging -- - - friend std::ostream& operator<<(std::ostream& out, const TaskBase& task) { - - // if substituted, print the task and its substitute - if (task.substitute) { - out << task.getId() << " -> " << *task.substitute; - return out; - } - - // if split, print the task and its children - if (task.isSplit()) { - out << task.getId() << " : " << task.state; - if (task.state == State::Done) return out; - - out << " = " << (task.parallel ? "parallel" : "sequential") << " ["; - if (task.left) out << *task.left; else out << "nil"; - out << ","; - if (task.right) out << *task.right; else out << "nil"; - out << "]"; - return out; - } - - // in all other cases, just print the id - out << task.getId() << " : " << task.state; - - // get the total number of dependencies - std::size_t numDependencies = task.num_active_dependencies; - - // remove release dependency - if (task.state == State::New) numDependencies -= 1; - - // remove delete dependency - numDependencies -= 1; - - // print number of task dependencies - if (task.state <= State::Blocked) { - out << " waiting for " << numDependencies << " task(s)"; - } - - return out; - } - - template - friend class SplitableTask; - - // --- debugging --- - - private: - - static std::mutex& getTaskRegisterLock() { - static std::mutex lock; - return lock; - } - - static std::set& getTaskRegister() { - static std::set instances; - return instances; - } - - static void registerTask(const TaskBase& task) { - std::lock_guard g(getTaskRegisterLock()); - getTaskRegister().insert(&task); - } - - static void unregisterTask(const TaskBase& task) { - std::lock_guard g(getTaskRegisterLock()); - auto pos = getTaskRegister().find(&task); - assert_true(pos!=getTaskRegister().end()); - getTaskRegister().erase(pos); - } - - public: - - static void dumpAllTasks(std::ostream& out) { - std::lock_guard g(getTaskRegisterLock()); - - // check whether monitoring is enabled - if (!MONITORING_ENABLED) { - out << " -- task tracking disabled, enable by setting MONITORING_ENABLED to true --\n"; - return; - } - - // list active tasks - std::cout << "List of all tasks:\n"; - for(const auto& cur : getTaskRegister()) { - std::cout << "\t" << *cur << "\n"; - } - } - - }; - - - // ----------- Task Dependency Manager Implementations --------------- - - template - void TaskDependencyManager::addDependency(TaskBase* x, const TaskPath& y) { - - // locate entry - std::size_t pos = getPosition(y); - - // load epoch - auto curEpoch = epoch.load(); - - // load the head - Entry* head = data[pos].load(); - - // check whether we are still in the same epoch - if (curEpoch != epoch.load()) { - // the epoch has changed, the previous is gone - x->dependencyDone(); - return; - } - - // check whether this task is already completed - if (isDone(head)) { - // signal that this dependency is done - x->dependencyDone(); - return; - } - - // insert element - Entry* entry = new Entry(); - entry->task = x; - entry->next = head; - - // update entry pointer lock-free - while (!data[pos].compare_exchange_weak(entry->next,entry)) { - - // check whether the task has been completed in the meanwhile - if (isDone(entry->next)) { - delete entry; - // signal that this dependency is done - x->dependencyDone(); - return; - } - - // otherwise, repeat until it worked - } - - // successfully inserted - } - - template - void TaskDependencyManager::markComplete(const TaskPath& task) { - - // ignore tasks that are too small - if (task.getLength() > max_depth) return; - - // mark as complete and obtain head of depending list - auto pos = getPosition(task); - Entry* cur = data[pos].exchange((Entry*)0x1); - - // do not process list twice (may be called multiple times due to substitutes) - if (isDone(cur)) return; - - // signal the completion of this task - while(cur) { - - // signal a completed dependency - cur->task->dependencyDone(); - - // move on to next entry - Entry* next = cur->next; - delete cur; - cur = next; - } - - // and its children - if (pos >= num_entries/2) return; - markComplete(task.getLeftChildPath()); - markComplete(task.getRightChildPath()); - } - - // ------------------------------------------------------------------- - - - - // ------------------------- Task Reference -------------------------- - - inline task_reference::task_reference(const TaskBase& task) - : family(task.getTaskFamily()), path(task.getTaskPath()) { - assert_false(task.isOrphan()) << "Unable to reference an orphan task!"; - } - - // ------------------------------------------------------------------- - - - // a task computing a value of type T - template - class Task : public TaskBase { - - T value; - - mutable PromisePtr promise; - - public: - - Task() : TaskBase(), promise(nullptr) {} - - Task(const T& value) - : TaskBase(true), value(value), promise(nullptr) {} - - Task(TaskBase* left, TaskBase* right, bool parallel) - : TaskBase(left, right, parallel), promise(nullptr) {} - - - virtual ~Task(){}; - - const T& getValue() const { - assert_true(isDone()) << this->getState(); - return value; - } - - void setPromise(const PromisePtr& newPromise) const { - - // this task must not be started yet - assert_eq(State::New,this->getState()); - - // there must not be a previous promise - assert_false(promise); - - // register promise - promise = newPromise; - } - - protected: - - void execute() override { - value = computeValue(); - } - - void aggregate() override { - value = computeAggregate(); - if(promise) { - promise->setValue(value); - } - } - - virtual T computeValue() { - // the default does nothing - return value; - }; - - virtual T computeAggregate() { - // nothing to do by default - return value; - }; - - virtual RuntimePredictor& getRuntimePredictor() const override { - assert_fail() << "Should not be reachable, predictions only intresting for splitable tasks!"; - return reference::getRuntimePredictor(); - } - }; - - template<> - class Task : public TaskBase { - - mutable PromisePtr promise; - - public: - - Task(bool done = false) : TaskBase(done) {} - - Task(TaskBase* left, TaskBase* right, bool parallel) - : TaskBase(left,right,parallel) {} - - virtual ~Task(){}; - - void getValue() const { - } - - void setPromise(const PromisePtr& newPromise) const { - - // this task must not be started yet - assert_eq(State::New,this->getState()); - - // there must not be a previous promise - assert_false(promise); - - // register promise - promise = newPromise; - } - - protected: - - void execute() override { - computeValue(); - } - - void aggregate() override { - computeAggregate(); - if(promise) { - promise->setReady(); - } - } - - virtual void computeValue() {}; - - virtual void computeAggregate() {}; - - virtual RuntimePredictor& getRuntimePredictor() const override { - assert_fail() << "Should not be reachable, predictions only intresting for splitable tasks!"; - return reference::getRuntimePredictor(); - } - }; - - - template< - typename Process, - typename R = std::result_of_t - > - class SimpleTask : public Task { - - Process task; - - public: - - SimpleTask(const Process& task) - : Task(), task(task) {} - - R computeValue() override { - return task(); - } - - virtual RuntimePredictor& getRuntimePredictor() const override { - return reference::getRuntimePredictor(); - } - - }; - - - template< - typename Process, - typename Split, - typename R = std::result_of_t - > - class SplitableTask : public Task { - - Process task; - Split decompose; - - Task* subTask; - - public: - - SplitableTask(const Process& c, const Split& d) - : Task(), task(c), decompose(d), subTask(nullptr) { - // mark this task as one that can be split - TaskBase::setSplitable(); - } - - R computeValue() override { - // this should not be called if split - assert_false(subTask); - return task(); - } - - R computeAggregate() override { - // the aggregated value depends on whether it was split or not - return (subTask) ? subTask->getValue() : Task::computeAggregate(); - } - - bool split() override; - - virtual RuntimePredictor& getRuntimePredictor() const override { - return reference::getRuntimePredictor(); - } - - }; - - template - class SplitTask : public Task { - - const Task& left; - const Task& right; - - C merge; - - public: - - SplitTask(Task* left, Task* right, C&& merge, bool parallel) - : Task(left,right,parallel), - left(*left), - right(*right), - merge(merge) {} - - - R computeValue() override { - // should not be reached - assert_fail() << "Should always be split!"; - return {}; - } - - R computeAggregate() override { - return merge(left.getValue(),right.getValue()); - } - - virtual RuntimePredictor& getRuntimePredictor() const override { - assert_fail() << "Should not be reachable, predictions only intresting for splitable tasks!"; - return reference::getRuntimePredictor(); - } - }; - - template - class SplitTask : public Task { - public: - - SplitTask(TaskBase* left, TaskBase* right, bool parallel) - : Task(left,right,parallel) {} - - void computeValue() override { - // should not be reached - assert_fail() << "Should always be split!"; - } - - void computeAggregate() override { - // nothing to do - } - - virtual RuntimePredictor& getRuntimePredictor() const override { - assert_fail() << "Should not be reachable, predictions only intresting for splitable tasks!"; - return reference::getRuntimePredictor(); - } - }; - - template> - Task* make_split_task(Deps&& deps, Task* left, Task* right, C&& merge, bool parallel) { - Task* res = new SplitTask(left, right, std::move(merge), parallel); - res->addDependencies(deps.begin(), deps.end()); - return res; - } - - template - Task* make_split_task(Deps&& deps, TaskBase* left, TaskBase* right, bool parallel) { - Task* res = new SplitTask(left, right, parallel); - res->addDependencies(deps.begin(), deps.end()); - return res; - } - - - - - - // --------------------------------------------------------------------------------------------- - // Treetures - // --------------------------------------------------------------------------------------------- - - - namespace detail { - - /** - * A common base class for all treetures, providing common functionality. - */ - template - class treeture_base { - - template - friend class SplitableTask; - - protected: - - task_reference taskRef; - - PromisePtr promise; - - treeture_base() : promise() {} - - treeture_base(const Task& task) : promise(std::make_shared>()) { - - // make sure task has not been started yet - assert_eq(TaskBase::State::New, task.getState()); - - // register the promise - task.setPromise(promise); - - // also create task reference if available - if (!task.isOrphan()) { - taskRef = task_reference(task); - } - } - - treeture_base(PromisePtr&& promise) - : promise(std::move(promise)) { - - // make sure the promise is valid and set - assert_true(this->promise); - assert_true(this->promise->isReady()); - - } - - public: - - using value_type = T; - - treeture_base(const treeture_base&) = delete; - treeture_base(treeture_base&& other) = default; - - treeture_base& operator=(const treeture_base&) = delete; - treeture_base& operator=(treeture_base&& other) = default; - - void wait() const; - - bool isDone() const { - return !promise || promise->isReady(); - } - - bool isValid() const { - return (bool)promise; - } - - task_reference getLeft() const { - return getTaskReference().getLeft(); - } - - task_reference getRight() const { - return getTaskReference().getRight(); - } - - task_reference getTaskReference() const { - return taskRef; - } - - operator task_reference() const { - return getTaskReference(); - } - - }; - - } - - /** - * A treeture, providing a reference to the state of a task as well as to - * the computed value upon completion. - */ - template - class treeture : public detail::treeture_base { - - using super = detail::treeture_base; - - friend class unreleased_treeture; - - protected: - - treeture(const Task& task) : super(task) {} - - public: - - using treeture_type = treeture; - - treeture() {} - - treeture(const T& value) - : super(std::make_shared>(value)) {} - - treeture(const treeture&) = delete; - treeture(treeture&& other) = default; - - treeture& operator=(const treeture&) = delete; - treeture& operator=(treeture&& other) = default; - - const T& get() { - static const T defaultValue = T(); - if (!this->promise) return defaultValue; - super::wait(); - return this->promise->getValue(); - } - - }; - - /** - * A specialization of the general value treeture for the void type, exhibiting - * a modified signature for the get() member function. - */ - template<> - class treeture : public detail::treeture_base { - - using super = detail::treeture_base; - - friend class unreleased_treeture; - - protected: - - treeture(const Task& task) : super(task) {} - - public: - - treeture() : super() {} - - treeture(const treeture&) = delete; - treeture(treeture&& other) = default; - - treeture& operator=(const treeture&) = delete; - treeture& operator=(treeture&& other) = default; - - void get() { - wait(); - } - - }; - - - - template - bool SplitableTask::split() { - // do not split a second time - if (!TaskBase::isSplitable()) return false; - - assert_true(TaskBase::State::Blocked == this->state || TaskBase::State::Ready == this->state) - << "Actual state: " << this->state; - - // decompose this task - Task* substitute = decompose().toTask(); - assert_true(substitute); - assert_true(substitute->state == TaskBase::State::New || substitute->state == TaskBase::State::Done); - - // record reference to sub-task - subTask = substitute; - - // mark as no longer splitable - TaskBase::setSplitable(false); - - // mutate to new task - Task::setSubstitute(substitute); - - // done - return true; - } - - - - // --------------------------------------------------------------------------------------------- - // Unreleased Treetures - // --------------------------------------------------------------------------------------------- - - namespace detail { - - template - struct done_task_to_treeture { - treeture operator()(const Task& task) { - return treeture(task.getValue()); - } - }; - - template<> - struct done_task_to_treeture { - treeture operator()(const Task&) { - return treeture(); - } - }; - } - - - /** - * A handle to a yet unreleased task. - */ - template - class unreleased_treeture { - - Task* task; - - public: - - using value_type = T; - - using treeture_type = treeture; - - unreleased_treeture(Task* task) - : task(task) {} - - unreleased_treeture(const unreleased_treeture&) =delete; - - unreleased_treeture(unreleased_treeture&& other) : task(other.task) { - other.task = nullptr; - } - - unreleased_treeture& operator=(const unreleased_treeture&) =delete; - - unreleased_treeture& operator=(unreleased_treeture&& other) { - std::swap(task,other.task); - return *this; - } - - ~unreleased_treeture() { - if(task) { - assert_fail() - << "Did you forget to release a treeture?"; - } - } - - treeture release() && { - - // there has to be a task - assert_true(task); - - // special case for completed tasks - if (task->isDone()) { - auto res = detail::done_task_to_treeture()(*task); - task->dependencyDone(); // remove one dependency for the lose of the owner - task = nullptr; - return res; - } - - // the referenced task has not been released yet - assert_eq(TaskBase::State::New,task->getState()); - - // create the resulting treeture - treeture res(*task); - - // start the task -- the actual release - task->start(); - - // reset the task pointer - task = nullptr; - - // return the resulting treeture - return res; - } - - operator treeture() && { - return std::move(*this).release(); - } - - T get() && { - return std::move(*this).release().get(); - } - - Task* toTask() && { - auto res = task; - task = nullptr; - return res; - } - - }; - - - - // --------------------------------------------------------------------------------------------- - // Operators - // --------------------------------------------------------------------------------------------- - - - - inline dependencies> after() { - return dependencies>(); - } - - template - auto after(const task_reference& r, const Rest& ... rest) { - return dependencies>(r,rest...); - } - - inline dependencies after(std::vector&& refs) { - return std::move(refs); - } - - - template - unreleased_treeture done(dependencies&& deps) { - auto res = new Task(true); - res->addDependencies(deps.begin(),deps.end()); - return res; - } - - inline unreleased_treeture done() { - return done(after()); - } - - template - unreleased_treeture done(dependencies&& deps, const T& value) { - auto res = new Task(value); - res->addDependencies(deps.begin(),deps.end()); - return res; - } - - template - unreleased_treeture done(const T& value) { - return done(after(),value); - } - - namespace runtime { - - // determines whether this thread is running in a nested context - bool isNestedContext(); - - } - - namespace detail { - - template - unreleased_treeture init(Deps&& deps, Task* task) { - - // add dependencies - task->addDependencies(deps.begin(),deps.end()); - - // create task family if requested - if (root) { - task->adopt(createFamily(!runtime::isNestedContext())); - } - - // done - return task; - } - - } - - - template> - unreleased_treeture spawn(dependencies&& deps, Action&& op) { - // create and initialize the task - return detail::init(std::move(deps), (Task*)(new SimpleTask(std::move(op)))); - } - - template - auto spawn(Action&& op) { - return spawn(after(),std::move(op)); - } - - template> - unreleased_treeture spawn(Deps&& deps, Action&& op, Split&& split) { - // create and initialize the task - return detail::init(std::move(deps), (Task*)(new SplitableTask(std::move(op),std::move(split)))); - } - - template - auto spawn(Action&& op, Split&& split) { - return spawn(after(),std::move(op),std::move(split)); - } - - template - unreleased_treeture seq(Deps&& deps) { - return done(std::move(deps)); - } - - inline unreleased_treeture seq() { - return done(); - } - - template - unreleased_treeture seq(dependencies&& deps, unreleased_treeture&& a, unreleased_treeture&& b) { - return make_split_task(std::move(deps),std::move(a).toTask(),std::move(b).toTask(),false); - } - - template - unreleased_treeture seq(unreleased_treeture&& a, unreleased_treeture&& b) { - return seq(after(),std::move(a),std::move(b)); - } - - template - unreleased_treeture seq(dependencies&& deps, unreleased_treeture&& f, unreleased_treeture&& ... rest) { - // TODO: conduct a binary split to create a balanced tree - return make_split_task(std::move(deps),std::move(f).toTask(),seq(std::move(rest)...).toTask(),false); - } - - template - unreleased_treeture seq(unreleased_treeture&& f, unreleased_treeture&& ... rest) { - return seq(after(), std::move(f),std::move(rest)...); - } - - template - unreleased_treeture par(Deps&& deps) { - return done(std::move(deps)); - } - - inline unreleased_treeture par() { - return done(); - } - - template - unreleased_treeture par(dependencies&& deps, unreleased_treeture&& a, unreleased_treeture&& b) { - return make_split_task(std::move(deps),std::move(a).toTask(),std::move(b).toTask(),true); - } - - template - unreleased_treeture par(unreleased_treeture&& a, unreleased_treeture&& b) { - return par(after(),std::move(a),std::move(b)); - } - - template - unreleased_treeture par(dependencies&& deps, unreleased_treeture&& f, unreleased_treeture&& ... rest) { - // TODO: conduct a binary split to create a balanced tree - return make_split_task(std::move(deps),std::move(f).toTask(),par(std::move(deps),std::move(rest)...).toTask(),true); - } - - template - unreleased_treeture par(unreleased_treeture&& f, unreleased_treeture&& ... rest) { - return par(after(), std::move(f),std::move(rest)...); - } - - - - template> - unreleased_treeture combine(dependencies&& deps, unreleased_treeture&& a, unreleased_treeture&& b, M&& m, bool parallel = true) { - return make_split_task(std::move(deps),std::move(a).toTask(),std::move(b).toTask(),std::move(m),parallel); - } - - template> - unreleased_treeture combine(unreleased_treeture&& a, unreleased_treeture&& b, M&& m, bool parallel = true) { - return reference::combine(after(),std::move(a),std::move(b),std::move(m),parallel); - } - - - // --------------------------------------------------------------------------------------------- - // Runtime - // --------------------------------------------------------------------------------------------- - - namespace runtime { - - - - // ----------------------------------------------------------------- - // Worker Pool - // ----------------------------------------------------------------- - - class Worker; - - thread_local static Worker* tl_worker = nullptr; - - static void setCurrentWorker(Worker& worker) { - tl_worker = &worker; - } - - static Worker& getCurrentWorker(); - - namespace detail { - - /** - * A utility to fix the affinity of the current thread to the given core. - * Does not do anything on operating systems other than linux. - */ - #ifdef __linux__ - inline void fixAffinity(int core) { - // fix affinity if user does not object - if(std::getenv("NO_AFFINITY") == nullptr) { - int num_cores = std::thread::hardware_concurrency(); - cpu_set_t mask; - CPU_ZERO(&mask); - CPU_SET(core % num_cores, &mask); - pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &mask); - } - } - #else - inline void fixAffinity(int) { } - #endif - - } - - class WorkerPool; - - - class Worker { - - using duration = RuntimePredictor::duration; - - // the targeted maximum queue length - // (more like a guideline, may be exceeded due to high demand) - enum { max_queue_length = 8 }; - - WorkerPool& pool; - - volatile bool alive; - - // list of tasks ready to run - OptimisticUnboundQueue queue; - - std::thread thread; - - unsigned id; - - // the list of workers to attempt to steel from, in order - std::vector stealingOrder; - - public: - - Worker(WorkerPool& pool, unsigned id) - : pool(pool), alive(true), id(id) { } - - Worker(const Worker&) = delete; - Worker(Worker&&) = delete; - - Worker& operator=(const Worker&) = delete; - Worker& operator=(Worker&&) = delete; - - void start() { - thread = std::thread([&](){ run(); }); - } - - void poison() { - alive = false; - } - - void join() { - thread.join(); - } - - void dumpState(std::ostream& out) const { - out << "Worker " << id << " / " << thread.get_id() << ":\n"; - out << "\tQueue:\n"; - for(const auto& cur : queue.getSnapshot()) { - out << "\t\t" << *cur << "\n"; - } - } - - private: - - void run(); - - void runTask(TaskBase& task); - - bool splitTask(TaskBase& task); - - duration estimateRuntime(const TaskBase& task) { - return task.getRuntimePredictor().predictTime(task.getDepth()); - } - - public: - - void schedule(TaskBase& task); - - bool schedule_step(); - - }; - - class WorkerPool { - - std::vector workers; - - // tools for managing idle threads - std::mutex m; - std::condition_variable cv; - - public: - - WorkerPool() { - - int numWorkers = std::thread::hardware_concurrency(); - - // parse environment variable - if (char* val = std::getenv("NUM_WORKERS")) { - auto userDef = std::atoi(val); - if (userDef != 0) numWorkers = userDef; - } - - // there must be at least one worker - if (numWorkers < 1) numWorkers = 1; - - // create workers - for(int i=0; istart(); - } - - // make worker 0 being linked to the main thread - setCurrentWorker(*workers.front()); - - // fix affinity of main thread - detail::fixAffinity(0); - - // fix worker id of main thread - setCurrentWorkerID(0); - - } - - ~WorkerPool() { - // shutdown threads - - { - // poison all workers - std::lock_guard guard(m); - for(auto& cur : workers) { - cur->poison(); - } - - // make work available - workAvailable(); - } - - // wait for their death - for(std::size_t i=1; ijoin(); - } - - // free resources - for(auto& cur : workers) { - delete cur; - } - - } - - static WorkerPool& getInstance() { - static WorkerPool pool; - return pool; - } - - int getNumWorkers() const { - return (int)workers.size(); - } - - private: - - mutable std::size_t initialLimit = std::numeric_limits::max(); - - public: - - std::size_t getInitialSplitDepthLimit() const { - if (initialLimit == std::numeric_limits::max()) { - std::size_t i = 0; - auto num_workers = getNumWorkers(); - while ((1<& getWorkers() const { - return workers; - } - - Worker& getWorker() { - return getWorker(0); - } - - void dumpState(std::ostream& out) { - for(const auto& cur : workers) { - cur->dumpState(out); - } - } - - protected: - - friend Worker; - - void waitForWork(volatile bool& alive) { - std::unique_lock lk(m); - if (!alive) return; - LOG_SCHEDULE("Going to sleep"); - cv.wait(lk); - LOG_SCHEDULE("Woken up again"); - } - - void workAvailable() { - // wake up all workers - cv.notify_all(); - } - - }; - - static Worker& getCurrentWorker() { - if (tl_worker) return *tl_worker; - return WorkerPool::getInstance().getWorker(); - } - - inline void Worker::run() { - - // fix worker ID - setCurrentWorkerID(id); - - // copy worker list - auto allWorkers = pool.getWorkers(); - - // a utility to add new steel targets - auto addStealTarget = [&](std::size_t idx) { - if (idx == id) return; - stealingOrder.push_back(allWorkers[idx]); - }; - - // create list of workers to steel from - auto numWorkers = allWorkers.size(); - for(std::size_t d=1; d 100000) { - - // report sleep event - logProfilerEvent(ProfileLogEntry::createWorkerSuspendedEntry()); - - // wait for work by putting thread to sleep - pool.waitForWork(alive); - - // report awakening - logProfilerEvent(ProfileLogEntry::createWorkerResumedEntry()); - - // reset cycles counter - idle_cycles = 0; - } - } - } - - // log worker termination event - logProfilerEvent(ProfileLogEntry::createWorkerDestroyedEntry()); - - // done - - } - - inline bool& getIsNestedFlag() { - static thread_local bool nested = false; - return nested; - } - - inline bool isNestedContext() { - return getIsNestedFlag(); - } - - inline void Worker::runTask(TaskBase& task) { - - // the splitting of a task may provide a done substitute => skip those - if (task.isDone()) return; - - LOG_SCHEDULE("Starting task " << task); - - // no substituted task may be processed - assert_false(task.isSubstituted()); - - // make sure this is a ready task - assert_eq(TaskBase::State::Ready,task.getState()); - - // mark as nested - bool& nestedContextFlag = getIsNestedFlag(); - bool old = nestedContextFlag; - nestedContextFlag = true; - - // process the task - if (task.isSplit()) { - task.run(); - } else { - - __allscale_unused auto taskId = task.getId(); - logProfilerEvent(ProfileLogEntry::createTaskStartedEntry(taskId)); - - // check whether this run needs to be sampled - auto level = task.getDepth(); - if (level == 0) { - - // level 0 does not need to be recorded (orphans) - task.run(); - - } else { - - // get predictor before task by be gone (as part of the processing) - RuntimePredictor& predictor = task.getRuntimePredictor(); - - // take the time to make predictions - auto start = RuntimePredictor::clock::now(); - task.run(); - auto time = RuntimePredictor::clock::now() - start; - - predictor.registerTime(level,time); - - } - - logProfilerEvent(ProfileLogEntry::createTaskEndedEntry(taskId)); - - } - - // reset old nested context state - nestedContextFlag = old; - - LOG_SCHEDULE("Finished task " << task); - } - - inline bool Worker::splitTask(TaskBase& task) { - using namespace std::chrono_literals; - - // the threshold for estimated task to be split - static const auto taskTimeThreshold = CycleCount(3*1000*1000); - - // only split the task if it is estimated to exceed a threshold - if (task.isSplitable() && (task.getDepth() == 0 || estimateRuntime(task) > taskTimeThreshold)) { - - // split this task - return task.split(); - - } - - // no split happend - return false; - } - - inline void Worker::schedule(TaskBase& task) { - - // assert that task has no unfinished dependencies - assert_true(task.isReady()); - - // no task that is substituted shall be scheduled - assert_false(task.isSubstituted()); - - - // actively distribute initial tasks, by assigning them to different workers - - // TODO: do the following only for top-level tasks!! - - if (!task.isOrphan() && task.getTaskFamily()->isTopLevel()) { - - // get the limit for initial decomposition - auto split_limit = pool.getInitialSplitDepthLimit(); - - // if below this limit, split the task - if (task.isSplitable() && task.getDepth() < split_limit) { - - // if splitting worked => we are done - if (task.split()) return; - - } - - // the depth limit for task being actively distributed - auto distribution_limit = split_limit + 2; - - // actively distribute tasks throughout the pool - if (task.getDepth() < distribution_limit) { - - // actively select the worker to issue the task to - std::size_t num_workers = pool.getNumWorkers(); - auto path = task.getTaskPath().getPath(); - auto depth = task.getDepth(); - - auto trgWorker = (depth==0) ? 0 : (path * num_workers) / ((uint64_t)1 << depth); - - // check the computation of the target worker - assert_lt(trgWorker,(std::size_t)pool.getNumWorkers()) - << "Error in target worker computation:\n" - << "\tNumWorkers: " << num_workers << "\n" - << "\tPath: " << path << "\n" - << "\tDepth: " << depth << "\n" - << "\tTarget: " << trgWorker << "\n"; - - - // if the target is another worker => send the task there - if (trgWorker != id) { - - // submit this task to the selected worker - pool.getWorker((int)trgWorker).schedule(task); - - // done - return; - - } - } - } - - // add task to queue - LOG_SCHEDULE( "Queue size before: " << queue.size() ); - - // no task that is substituted shall be scheduled - assert_false(task.isSubstituted()); - - // add task to queue - queue.push_back(&task); - - // signal available work - pool.workAvailable(); - - // log new queue length - LOG_SCHEDULE( "Queue size after: " << queue.size() ); - - } - - - inline bool Worker::schedule_step() { - - // process a task from the local queue - if (TaskBase* t = queue.pop_front()) { - - // the task should not have a substitute - assert_false(t->isSubstituted()); - - // check precondition of task - assert_true(t->isReady()) << "Actual state: " << t->getState(); - - // if the queue is not full => create more tasks - if (queue.size() < (max_queue_length*3)/4) { - - LOG_SCHEDULE( "Splitting tasks @ queue size: " << queue.size() ); - - - - // split task and be done - if (splitTask(*t)) return true; - - // the task should not have a substitute - assert_false(t->isSubstituted()); - - } - - // process this task - runTask(*t); - return true; - } - - // look through potential targets to steel a task - for(const auto& cur : stealingOrder) { - - // otherwise, steal a task from another worker - Worker& other = *cur; - - // try to steal a task from another queue - if (TaskBase* t = other.queue.try_pop_back()) { - - // the task should not have a substitute - assert_false(t->isSubstituted()); - - // log creation of worker event - logProfilerEvent(ProfileLogEntry::createTaskStolenEntry(t->getId())); - - LOG_SCHEDULE( "Stolen task: " << t ); - - // split task the task (since there is not enough work in the queue) - if (splitTask(*t)) return true; - - // the task should not have a substitute - assert_false(t->isSubstituted()); - - // process task - runTask(*t); - return true; // successfully completed a task - } - - } - - // no task found => wait a moment - cpu_relax(); - - // report back the failed steal attempt - return false; - } - - } - - namespace monitoring { - - inline std::ostream& operator<<(std::ostream& out, const Event& e) { - switch(e.type) { - case EventType::Run: return out << "Running task " << *e.task; - case EventType::RunDirect: return out << "Running direct task " << *e.task; - case EventType::Split: return out << "Splitting task " << *e.task; - case EventType::Wait: return out << "Waiting for task " << *e.task; - case EventType::DependencyWait: return out << "Waiting for dependency: " << e.taskId; - } - return out << "Unknown Event"; - } - - }// end namespace monitoring - - - inline void TaskBase::start() { - LOG_TASKS("Starting " << *this ); - - // check that the given task is a new task - assert_eq(TaskBase::State::New, state); - - // move to next state - setState(State::Blocked); - - // if below the initial split limit, split this task - if (!isOrphan() && getTaskFamily()->isTopLevel() && isSplitable() && getDepth() < runtime::WorkerPool::getInstance().getInitialSplitDepthLimit()) { - - // attempt to split this task - split(); - - } - - // release dummy-dependency to get task started - dependencyDone(); - } - - inline void TaskBase::dependencyDone() { - - // keep a backup in case the object is destroyed asynchronously - auto substitutedLocalCopy = substituted.load(); - - // decrease the number of active dependencies - int oldValue = num_active_dependencies.fetch_sub(1); - - // compute the new value - int newValue = oldValue - 1; - - // make sure there are no releases that should not be - assert_le(0,newValue); - - // if we are down to 0 => destroy this task - if (newValue == 0) { - - // at this point this task must be done - assert_eq(State::Done,state); - - // destroy this object, and be done - delete this; - return; - } - - // if the new value is not 1 => ignore - if (newValue != 1) return; - - // if the value is 1, we release this task for computation - assert_eq(1,newValue); - - // handle substituted instances by ignoring the message - if (substitutedLocalCopy || substituted) return; - - // make sure that at this point there is still a parent left - assert_eq(num_active_dependencies, 1); - - // at this point the state must not be new - assert_ne(State::New, state) - << "A new task must not reach a state where its last dependency is released."; - - // actually, every task here must be in blocked state - assert_eq(State::Blocked, state) << *this << "\t" << substitutedLocalCopy << "\n"; - - // update the state to ready - // (this can only be reached by one thread) - setState(State::Ready); - - // schedule task - runtime::getCurrentWorker().schedule(*this); - - } - - inline void TaskBase::wait() { - // log this event - // auto action = monitoring::log(monitoring::EventType::Wait, this); - - LOG_TASKS("Waiting for " << *this ); - - // check that this task has been started before - assert_lt(State::New,state); - - // wait until this task is finished - while(!isDone()) { - // make some progress - runtime::getCurrentWorker().schedule_step(); - } - } - - inline void task_reference::wait() const { - // log this event - // auto action = monitoring::log(monitoring::EventType::DependencyWait, TaskID(family->getId(),path)); - - // wait until the referenced task is done - while(!isDone()) { - // but while doing so, do useful stuff - runtime::getCurrentWorker().schedule_step(); - } - } - - namespace detail { - - template - void treeture_base::wait() const { - // wait for completion - while (promise && !promise->isReady()) { - // make some progress - runtime::getCurrentWorker().schedule_step(); - } - } - - } - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale - - -inline void __dumpRuntimeState() { - std::cout << "\n ------------------------- Runtime State Dump -------------------------\n"; - allscale::api::core::impl::reference::monitoring::ThreadState::dumpStates(std::cout); - allscale::api::core::impl::reference::runtime::WorkerPool::getInstance().dumpState(std::cout); - allscale::api::core::impl::reference::TaskBase::dumpAllTasks(std::cout); - std::cout << "\n ----------------------------------------------------------------------\n"; -} diff --git a/vendor/allscale/api/core/impl/sequential/treeture.h b/vendor/allscale/api/core/impl/sequential/treeture.h deleted file mode 100644 index c40e0ac4c..000000000 --- a/vendor/allscale/api/core/impl/sequential/treeture.h +++ /dev/null @@ -1,335 +0,0 @@ -#pragma once - -#include - -#include "allscale/utils/assert.h" -#include "allscale/utils/printer/arrays.h" - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace sequential { - - - // -------------------------------------------------------------------------------------------- - // sequential treeture implementation - // -------------------------------------------------------------------------------------------- - - - // ------------------------------------- Declarations ----------------------------------------- - - /** - * The actual treeture, referencing the computation of a value. - */ - template - class treeture; - - /** - * A treeture not yet released to the runtime system for execution. - */ - template - class unreleased_treeture; - - /** - * A handle for a lazily constructed unreleased treeture. This intermediate construct is utilized - * for writing templated code that can be optimized to overhead-less computed values and to facilitate - * the support of the sequence combinator. - */ - template - class lazy_unreleased_treeture; - - /** - * A class to reference tasks for synchronization purposes. - */ - class task_reference; - - /** - * A class to model task dependencies - */ - class dependencies; - - - // ------------------------------------- Definitions ------------------------------------------ - - // -- task_reference -- - - class task_reference { - - bool isDone() const { - return true; - } - - void wait() const { - // always done - } - - task_reference& descentLeft() { - return *this; - } - - task_reference& descentRight() { - return *this; - } - - task_reference getLeft() const { - return *this; - } - - task_reference getRight() const { - return *this; - } - - }; - - - // -- treeture -- - - template<> - class treeture : public task_reference { - public: - - using value_type = void; - - treeture() {} - - template - explicit treeture(Fun&& fun) { - fun(); - } - - template - treeture(const treeture& /*other*/) {} - - void get() const { - // nothing to do - } - - }; - - template - class treeture : public task_reference { - - T value; - - public: - - using value_type = T; - - using treeture_type = treeture; - - treeture() {} - - treeture(const T& value) - : value(value) {} - - treeture(const T&& value) - : value(std::move(value)) {} - - template - explicit treeture(Fun&& fun) - : value(fun()) {} - - T get() const { - return value; - } - - }; - - - template> - treeture make_treeture(Op&& op) { - return treeture(std::move(op)); - } - - // -- unreleased_treeture -- - - template - class unreleased_treeture : public task_reference { - - treeture res; - - public: - - using value_type = T; - - using treeture_type = treeture; - - unreleased_treeture() {} - - template - explicit unreleased_treeture(Fun&& fun) - : res(fun()) {} - - unreleased_treeture(const unreleased_treeture&) =delete; - unreleased_treeture(unreleased_treeture&&) =default; - - unreleased_treeture& operator=(const unreleased_treeture&) =delete; - unreleased_treeture& operator=(unreleased_treeture&&) =default; - - treeture release() const && { - return res; - } - - operator treeture() const && { - return std::move(*this).release(); - } - - T get() const && { - return std::move(*this).release().get(); - } - - }; - - template::value_type> - unreleased_treeture make_unreleased_treeture(Gen&& gen) { - return unreleased_treeture(std::move(gen)); - } - - template - class lazy_unreleased_treeture { - - mutable Gen gen; - - public: - - using value_type = T; - - using treeture_type = treeture; - - explicit lazy_unreleased_treeture(Gen&& gen) - : gen(std::move(gen)) {} - - unreleased_treeture toUnreleasedTreeture() const { - return gen(); - } - - treeture release() const { - return toUnreleasedTreeture(); - } - - T get() const { - return release().get(); - } - - operator unreleased_treeture() const { - return toUnreleasedTreeture(); - } - - operator treeture() const { - return release(); - } - - }; - - template::value_type> - lazy_unreleased_treeture make_lazy_unreleased_treeture(Gen&& gen) { - return lazy_unreleased_treeture(std::move(gen)); - } - - /** - * There are no dependencies to be recorded, so this object is an empty object. - */ - class dependencies {}; - - - // -------------------------------------- Operators ------------------------------------------- - - - inline dependencies after() { - return {}; - } - - template - dependencies after(const task_reference&, const Rest& ... rest) { - return after(rest...); - } - - inline dependencies after(const std::vector&) { - return {}; // if it is a task_reference, it is computed - } - - - inline auto done() { - return make_lazy_unreleased_treeture([=](){ - return make_unreleased_treeture([=](){ return treeture(); }); - }); - } - - template - auto done(const T& value) { - return make_lazy_unreleased_treeture([=](){ - return make_unreleased_treeture([=](){ return treeture(value); }); - }); - } - - - template - auto spawn(dependencies&&, Op&& op) { - return make_lazy_unreleased_treeture([=](){ - return make_unreleased_treeture([=](){ return make_treeture(std::move(op)); }); - }); - } - - template - auto spawn(Op&& op) { - return spawn(after(),std::move(op)); - } - - - inline auto seq() { - return done(); - } - - template - auto seq(dependencies&&, lazy_unreleased_treeture&& f, lazy_unreleased_treeture&& ... rest) { - return make_lazy_unreleased_treeture([f,rest...]() mutable { - return make_unreleased_treeture([f,rest...]() mutable { - return make_treeture([f,rest...]() mutable { - f.get(); - seq(std::move(rest)...).get(); - }); - }); - }); - } - - template - auto seq(lazy_unreleased_treeture&& f, lazy_unreleased_treeture&& ... rest) { - return seq(after(), std::move(f),std::move(rest)...); - } - - template - auto par(dependencies&&, lazy_unreleased_treeture&& ... tasks) { - // for the sequential implementation, parallel is the same as sequential - return seq(std::move(tasks)...); - } - - template - auto par(lazy_unreleased_treeture&& ... tasks) { - return par(after(), std::move(tasks)...); - } - - - template - auto combine(dependencies&&, lazy_unreleased_treeture&& a, lazy_unreleased_treeture&& b, M&& m, bool = true) { - return make_lazy_unreleased_treeture([=]() { - return make_unreleased_treeture([=]() { - return make_treeture([=]() { - return m(a.get(),b.get()); - }); - }); - }); - } - - template - auto combine(lazy_unreleased_treeture&& a, lazy_unreleased_treeture&& b, M&& m, bool parallel = true) { - return sequential::combine(after(), std::move(a), std::move(b), std::move(m), parallel); - } - - -} // end namespace sequential -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale - diff --git a/vendor/allscale/api/core/io.h b/vendor/allscale/api/core/io.h deleted file mode 100644 index 90727ac38..000000000 --- a/vendor/allscale/api/core/io.h +++ /dev/null @@ -1,575 +0,0 @@ -#pragma once - -#include - -#include "allscale/api/core/impl/reference/io.h" -#include "allscale/utils/serializer.h" - -namespace allscale { -namespace api { -namespace core { - - - // ---------------------------------------------------------------------- - // Declarations - // ---------------------------------------------------------------------- - - - /** - * Supported IO modes for stream based operations. - * @see http://en.cppreference.com/w/cpp/io/c#Binary_and_text_modes - */ - enum class Mode { - Text, Binary - }; - - /** - * An abstraction for a file or buffer to read/write from. - */ - class Entry; - - /** - * An out-of-order stream for reading information from a file/buffer previously - * written using an output stream. - */ - class InputStream; - - /** - * An out-of-order stream for writing information to some file/buffer. - */ - class OutputStream; - - /** - * A utility for reading the content of a storage entity (e.g. a file) through - * memory mapped IO. - */ - class MemoryMappedInput; - - /** - * A utility for reading and writing the content of a storage entity (e.g. a file) through - * memory mapped IO. - */ - class MemoryMappedOutput; - - /** - * An IO manager for in-memory data buffer manipulations. - */ - class BufferIOManager; - - /** - * An IO manager providing access to the file system. - */ - class FileIOManager; - - - - // ---------------------------------------------------------------------- - // Definitions - // ---------------------------------------------------------------------- - - - // -- Stream Based IO --------------------------------------------------- - - /** - * A converter between this interface and the reference implementation - */ - inline impl::reference::Mode toRefMode(Mode mode) { - switch(mode) { - case Mode::Text: return impl::reference::Mode::Text; - case Mode::Binary: return impl::reference::Mode::Binary; - } - assert_fail() << "Invalid mode encountered!"; - return {}; - } - - class Entry { - - friend InputStream; - - friend OutputStream; - - friend MemoryMappedInput; - - friend MemoryMappedOutput; - - template - friend class IOManager; - - using RefEntry = impl::reference::Entry; - - // the wrapped up reference implementation - RefEntry entry; - - // the constructor is private to restrict creation to the corresponding factories - Entry(const RefEntry& entry) : entry(entry) {} - - }; - - - /** - * A stream to read data from some entry of an IO manager. - */ - class InputStream { - - template - friend class IOManager; - - using RefInStream = impl::reference::InputStream; - - // the wrapped up reference implementation - RefInStream& istream; - - InputStream(RefInStream& istream) - : istream(istream) {} - - public: - - InputStream(const InputStream&) = delete; - InputStream(InputStream&&) = default; - - /** - * Obtains the entry this stream is associated to. - */ - Entry getEntry() const { - return istream.getEntry(); - } - - /** - * Provides atomic access to this stream, allowing the given body to - * to perform a sequence of read operations without potential interference - * of other threads. - */ - template - InputStream& atomic(const Body& body) { - istream.atomic(body); - return *this; - } - - /** - * Reads a single instance of the given type (atomic). - */ - template - T read() { - return istream.read(); - } - - /** - * An idiomatic overload of the read operation. - */ - template - InputStream& operator>>(T& trg) { - istream >> trg; - return *this; - } - - /** - * Allows to test whether this stream is in a valid state. It can, for instance, - * be utilized to determine whether there has been an error during the last - * performed operation or whether in text mode the end of a file has been reached. - */ - operator bool() const { - return istream; - } - - // -- make it serializable -- - - static InputStream load(utils::ArchiveReader& a) { - return { RefInStream::load(a) }; - } - - void store(utils::ArchiveWriter& a) const { - istream.store(a); - } - }; - - - - /** - * A stream to write data to some entry of an IO manager. - */ - class OutputStream { - - template - friend class IOManager; - - using RefOutStream = impl::reference::OutputStream; - - RefOutStream& ostream; - - OutputStream(RefOutStream& ostream) - : ostream(ostream) {} - - public: - - OutputStream(const OutputStream&) = delete; - OutputStream(OutputStream&&) = default; - - /** - * Obtains the entry this stream is associated to. - */ - Entry getEntry() const { - return ostream.getEntry(); - } - - /** - * Provides atomic access to this stream, allowing the given body to - * to perform a sequence of write operations without potential interference - * of other threads. - */ - template - OutputStream& atomic(const Body& body) { - ostream.atomic(body); - return *this; - } - - /** - * Writes a single instance of the given type (atomic). - */ - template - OutputStream& write(const T& value) { - ostream.write(value); - return *this; - } - - /** - * An idiomatic overload of the write operation. - */ - template - OutputStream& operator<<(const T& value) { - ostream << value; - return *this; - } - OutputStream& operator<<(const char* value) { - ostream << value; - return *this; - } - - /** - * Allows to test whether this stream is in a valid state. It can, for instance, - * be utilized to determine whether there has been an error during the last - * performed operation. - */ - operator bool() const { - return ostream; - } - - // -- make it serializable -- - - static OutputStream load(utils::ArchiveReader& a) { - return { RefOutStream::load(a) }; - } - - void store(utils::ArchiveWriter& a) const { - ostream.store(a); - } - }; - - - - - - // -- Memory Mapped IO -------------------------------------------------- - - - /** - * A utility for reading the content of a storage entity (e.g. a file) through - * memory mapped IO. - */ - class MemoryMappedInput { - - template - friend class IOManager; - - using Impl = impl::reference::MemoryMappedInput; - - Impl impl; - - MemoryMappedInput(Impl&& impl) : impl(impl) {} - - public: - - /** - * The identifier for the underlying storage entity. - */ - Entry getEntry() const { - return impl.getEntry(); - } - - /** - * Provides access to the underlying data by interpreting it - * as an instance of type T. - */ - template - const T& access() const { - return impl.access(); - } - - /** - * Provides access to the underlying data by interpreting it - * as an array of instances of type T. - */ - template - const T* accessArray() const { - return &access(); - } - - // -- make it serializable -- - - static MemoryMappedInput load(utils::ArchiveReader& a) { - return { Impl::load(a) }; - } - - void store(utils::ArchiveWriter& a) const { - impl.store(a); - } - }; - - /** - * A utility for reading and writing the content of a storage entity (e.g. a file) through - * memory mapped IO. - */ - class MemoryMappedOutput { - - template - friend class IOManager; - - using Impl = impl::reference::MemoryMappedOutput; - - Impl impl; - - MemoryMappedOutput(Impl&& impl) : impl(impl) {} - - public: - - /** - * The identifier for the underlying storage entity. - */ - Entry getEntry() const { - return impl.getEntry(); - } - - /** - * Provides access to the underlying data by interpreting it - * as an instance of type T. - */ - template - T& access() const { - return impl.access(); - } - - /** - * Provides access to the underlying data by interpreting it - * as an array of instances of type T. - */ - template - T* accessArray() const { - return &access(); - } - - // -- make it serializable -- - - static MemoryMappedOutput load(utils::ArchiveReader& a) { - return { Impl::load(a) }; - } - - void store(utils::ArchiveWriter& a) const { - impl.store(a); - } - }; - - - - // -- IO Manager -------------------------------------------------------- - - /** - * An IO manager, as the central dispatcher for IO operations. - */ - template - class IOManager { - - using Impl = impl::reference::IOManager; - - Impl impl; - - public: - - /** - * Creates a new entry with the given name in the underlying storage system. - * - * @param name the name of the entry (e.g. file) - * @param mode whether it is a binary or text file - * @return a entry ID referencing the newly created resource - */ - Entry createEntry(const std::string& name, Mode mode = Mode::Text) { - return impl.createEntry(name, toRefMode(mode)); - } - - /** - * Register a new output stream with the given name within the system. - * The call will create the underlying file and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the name of the stream to be opened -- nothing happens if already opened - */ - InputStream openInputStream(Entry entry) { - return InputStream(impl.openInputStream(entry.entry)); - } - - /** - * Register a new output stream with the given name within the system. - * The call will create the underlying file and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the name of the stream to be opened -- nothing happens if already opened - */ - OutputStream openOutputStream(Entry entry) { - return OutputStream(impl.openOutputStream(entry.entry)); - } - - /** - * Register a new memory mapped input with the given name within the system. - * The call will load the underlying storage and prepare input operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the storage entry to be opened -- nothing happens if already opened - */ - MemoryMappedInput openMemoryMappedInput(Entry entry) { - return MemoryMappedInput(impl.openMemoryMappedInput(entry.entry)); - } - - /** - * Register a new memory mapped output with the given name within the system. - * The call will create the underlying storage and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the storage entry to be opened -- nothing happens if already opened - */ - MemoryMappedOutput openMemoryMappedOutput(Entry entry, std::size_t size) { - return MemoryMappedOutput(impl.openMemoryMappedOutput(entry.entry,size)); - } - - /** - * Obtains an input stream to read data from a storage entry. - * The storage entry is maintained by the manager and the provided output stream - * is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a stream to append data to - */ - InputStream getInputStream(Entry entry) { - return InputStream(impl.getInputStream(entry.entry)); - } - - /** - * Obtains an output stream to write data to a storage entry. - * The storage entry is maintained by the manager and the provided output stream - * is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a stream to append data to - */ - OutputStream getOutputStream(Entry entry) { - return OutputStream(impl.getOutputStream(entry.entry)); - } - - /** - * Obtains a memory mapped input to read data from a storage entry. - * The storage entry is maintained by the manager and the provided memory mapped - * input is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a requested memory mapped input - */ - MemoryMappedInput getMemoryMappedInput(Entry entry) { - return MemoryMappedInput(impl.getMemoryMappedInput(entry)); - } - - /** - * Obtains a memory mapped output to write data to a storage entry. - * The storage entry is maintained by the manager and the provided memory mapped - * output is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a requested memory mapped output - */ - MemoryMappedOutput getMemoryMappedOutput(Entry entry, std::size_t size) { - return MemoryMappedOutput(impl.getMemoryMappedOutput(entry,size)); - } - - /** - * Closes the given stream. - */ - void close(const InputStream& in) { - impl.close(in.istream); - } - - /** - * Closes the given stream. - */ - void close(const OutputStream& out) { - impl.close(out.ostream); - } - - /** - * Closes the given memory mapped entry. - */ - void close(const MemoryMappedInput& in) { - impl.close(in.impl); - } - - /** - * Closes the given memory mapped entry. - */ - void close(const MemoryMappedOutput& out) { - impl.close(out.impl); - } - - /** - * Determines whether the given entry exists. - */ - bool exists(Entry entry) const { - return impl.exists(entry.entry); - } - - /** - * Deletes the entry with the given name. - */ - void remove(Entry entry) { - impl.remove(entry.entry); - } - - }; - - // Definition of the BufferIOManager - class BufferIOManager : public IOManager { - - }; - - // Definition of the FileIOManager - class FileIOManager : public IOManager { - - /** - * Make constructor private to avoid instances. - */ - FileIOManager() {} - - public: - - /** - * Provide access to the singleton instance. - */ - static FileIOManager& getInstance() { - static FileIOManager mgr; - return mgr; - } - - }; - -} // end namespace core -} // end namespace api -} // end namespace allscale - diff --git a/vendor/allscale/api/core/prec.h b/vendor/allscale/api/core/prec.h deleted file mode 100644 index d4ce84c57..000000000 --- a/vendor/allscale/api/core/prec.h +++ /dev/null @@ -1,486 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "allscale/utils/functional_utils.h" -#include "allscale/utils/vector_utils.h" - -#include "allscale/api/core/treeture.h" - -namespace allscale { -namespace api { -namespace core { - - // ----- fun variants + utils ---------- - - template - class fun_variants : public std::tuple { - public: - explicit fun_variants(const Types&... args) : std::tuple(args...) { } - }; - - template - inline fun_variants make_fun_variants(const Types& ... elements) { - return fun_variants(elements...); - } - - template - struct is_fun_variants : public std::false_type {}; - - template - struct is_fun_variants> : public std::true_type {}; - - template - struct is_fun_variants : public is_fun_variants {}; - - template - struct is_fun_variants : public is_fun_variants {}; - - - - - namespace detail { - - template - struct result_wrapper { - - template - Out operator()(Fun&& fun) { - return fun(); - } - - }; - - template - struct result_wrapper,T> { - - template - completed_task operator()(Fun&& fun) { - return done(fun()); - } - - }; - - template<> - struct result_wrapper,void> { - - template - completed_task operator()(Fun&& fun) { - fun(); - return done(); - } - - }; - - template - struct result_wrapper,T> : public result_wrapper,T> {}; - - template - struct result_wrapper,T> : public result_wrapper,T> {}; - - template - struct result_wrapper,T> : public result_wrapper,T> {}; - - - struct call_first { - - template< - typename Res, - typename ... Versions, - typename ... Args - > - Res call(const fun_variants& versions, const Args& ... args) { - using res_type = decltype(std::get<0>(versions)(args...)); - result_wrapper wrap; - return wrap([&](){ return std::get<0>(versions)(args...); }); - } - - }; - - struct call_last { - - template< - typename Res, - typename ... Versions, - typename ... Args - > - Res call(const fun_variants& versions, const Args& ... args) { - using res_type = decltype(std::get(versions)(args...)); - result_wrapper wrap; - return wrap([&](){ return std::get(versions)(args...); }); - } - - }; - - } // end namespace detail - - - // ----- option handling handling ---------- - - - template - fun_variants pick(Options&& ... options) { - return make_fun_variants(std::move(options)...); - } - - - // ----- function handling ---------- - - template< - typename O, - typename I, - typename BaseCaseTest, - typename BaseCases, - typename StepCases - > - struct fun_def; - - template< - typename O, - typename I, - typename BaseCaseTest, - typename ... BaseCases, - typename ... StepCases - > - struct fun_def,fun_variants> { - typedef I in_type; - typedef O out_type; - - BaseCaseTest bc_test; - fun_variants base; - fun_variants step; - - fun_def( - const BaseCaseTest& test, - const fun_variants& base, - const fun_variants& step - ) : bc_test(test), base(base), step(step) {} - - fun_def(const fun_def& other) = default; - fun_def(fun_def&& other) = default; - - fun_def& operator=(const fun_def&) = delete; - fun_def& operator=(fun_def&&) = delete; - - template - impl::sequential::unreleased_treeture sequentialCall(impl::sequential::dependencies&& deps, const I& in, const Funs& ... funs) const { - // check for the base case, producing a value to be wrapped - if (bc_test(in)) { - return impl::sequential::spawn(std::move(deps),[&]{ return detail::call_first().template call(base, in); }); - } - - // run sequential step case producing an immediate value - return detail::call_last().template call>(step, in, funs.sequential_call()...); - } - - - template - impl::reference::unreleased_treeture parallelCall(impl::reference::dependencies&& deps, const I& in, const Funs& ... funs) const { - // check for the base case - const auto& base = this->base; - if (bc_test(in)) { - return impl::reference::spawn(std::move(deps), [=] { - return detail::call_first().template call(base, in); - }); - } - - // run step case - const auto& step = this->step; - return impl::reference::spawn( - // the dependencies of the new task - std::move(deps), - // the process version (sequential): - [=] { return detail::call_last().template call>(step, in, funs.sequential_call()...).get(); }, - // the split version (parallel): - [=] { return detail::call_first().template call>(step, in, funs.parallel_call()...); } - ); - } - - }; - - - - namespace detail { - - template - struct is_fun_def : public std::false_type {}; - - template - struct is_fun_def> : public std::true_type {}; - - template - struct is_fun_def : public is_fun_def {}; - - template - struct is_fun_def : public is_fun_def {}; - - template - struct is_fun_def : public is_fun_def {}; - - } - - template< - typename BT, typename First_BC, typename ... BC, typename ... SC, - typename O = typename utils::lambda_traits::result_type, - typename I = typename utils::lambda_traits::arg1_type - > - fun_def,fun_variants> - fun(const BT& a, const fun_variants& b, const fun_variants& c) { - return fun_def,fun_variants>(a,b,c); - } - - template< - typename BT, typename BC, typename SC, - typename filter = typename std::enable_if::value && !is_fun_variants::value,int>::type - > - auto fun(const BT& a, const BC& b, const SC& c) -> decltype(fun(a,make_fun_variants(b),make_fun_variants(c))) { - return fun(a,make_fun_variants(b),make_fun_variants(c)); - } - - template< - typename BT, typename BC, typename SC, - typename filter = typename std::enable_if::value && is_fun_variants::value,int>::type - > - auto fun(const BT& a, const BC& b, const SC& c) -> decltype(fun(a,make_fun_variants(b),c)) { - return fun(a,make_fun_variants(b),c); - } - - template< - typename BT, typename BC, typename SC, - typename filter = typename std::enable_if::value && !is_fun_variants::value,int>::type - > - auto fun(const BT& a, const BC& b, const SC& c) -> decltype(fun(a,b,make_fun_variants(c))) { - return fun(a,b,make_fun_variants(c)); - } - - - // --- recursive definitions --- - - template struct rec_defs; - - - namespace detail { - - - template< - unsigned i, - typename ... Defs - > - struct callable { - - using I = typename utils::type_at>::type::in_type; - using O = typename utils::type_at>::type::out_type; - - rec_defs defs; - - callable(const rec_defs& defs) : defs(defs) {}; - - struct SequentialCallable { - rec_defs defs; - - auto operator()(impl::sequential::dependencies&& deps, const I& in) const { - return impl::sequential::make_lazy_unreleased_treeture([=]() mutable { - return defs.template sequentialCall(std::move(deps),in); - }); - } - - auto operator()(const I& in) const { - return impl::sequential::make_lazy_unreleased_treeture([=](){ - return defs.template sequentialCall(impl::sequential::dependencies(),in); - }); - } - - }; - - auto sequential_call() const { - return SequentialCallable{defs}; - } - - - struct ParallelCallable { - rec_defs defs; - - template - auto operator()(impl::reference::dependencies&& deps, const I& in) const { - return defs.template parallelCall(std::move(deps),in); - } - - auto operator()(core::no_dependencies&&, const I& in) const { - return defs.template parallelCall(impl::reference::after(),in); - } - - auto operator()(const I& in) const { - return operator()(after(), in); - } - - }; - - auto parallel_call() const { - return ParallelCallable{defs}; - } - }; - - template< - unsigned i, - typename ... Defs - > - callable createCallable(const rec_defs& defs) { - return callable(defs); - } - - template - struct caller { - template - impl::sequential::unreleased_treeture sequentialCall(const F& f, impl::sequential::dependencies&& deps, const I& i, const D& d, const Args& ... args) const { - return caller().template sequentialCall(f,std::move(deps),i,d,createCallable(d),args...); - } - template - impl::reference::unreleased_treeture parallelCall(const F& f, impl::reference::dependencies&& deps, const I& i, const D& d, const Args& ... args) const { - return caller().template parallelCall(f,std::move(deps),i,d,createCallable(d),args...); - } - }; - - template<> - struct caller<0> { - template - auto sequentialCall(const F& f, impl::sequential::dependencies&& deps, const I& i, const D& d, const Args& ... args) const { - return f.sequentialCall(std::move(deps),i,createCallable<0>(d),args...); - } - template - impl::reference::unreleased_treeture parallelCall(const F& f, impl::reference::dependencies&& deps, const I& i, const D& d, const Args& ... args) const { - return f.template parallelCall(std::move(deps),i,createCallable<0>(d),args...); - } - }; - - - template - struct is_rec_def : public std::false_type {}; - - template - struct is_rec_def> : public std::true_type {}; - - template - struct is_rec_def : public is_rec_def {}; - - template - struct is_rec_def : public is_rec_def {}; - - template - struct is_rec_def : public is_rec_def {}; - - } - - - template - struct rec_defs : public std::tuple { - - template - rec_defs(const Args& ... args) : std::tuple(args...) {} - - rec_defs(const rec_defs&) = default; - rec_defs(rec_defs&&) = default; - - rec_defs& operator=(const rec_defs&) = delete; - rec_defs& operator=(rec_defs&&) = delete; - - template< - unsigned i, - typename O, - typename I - > - impl::sequential::unreleased_treeture sequentialCall(impl::sequential::dependencies&& deps, const I& in) const { - // call target function with a spawn - return detail::caller().template sequentialCall(std::get(*this),std::move(deps),in,*this); - } - - template< - bool root, - unsigned i, - typename O, - typename I, - typename DepsKind - > - impl::reference::unreleased_treeture parallelCall(impl::reference::dependencies&& deps, const I& in) const { - // call target function with a spawn - return detail::caller().template parallelCall(std::get(*this),std::move(deps),in,*this); - } - - }; - - - namespace detail { - - /** - * The struct forming the callable created by the prec operator. - */ - template< - unsigned i, - typename I, - typename O, - typename ... Defs - > - struct prec_operation { - - rec_defs defs; - - template - treeture operator()(impl::reference::dependencies&& deps, const I& in) { - return defs.template parallelCall(std::move(deps),in); - } - - treeture operator()(core::no_dependencies&&, const I& in) { - return defs.template parallelCall(impl::reference::after(),in); - } - - treeture operator()(const I& in) { - return (*this)(after(),in); - } - }; - - - } - - - template< - typename ... Defs - > - rec_defs group(const Defs& ... defs) { - return rec_defs(defs...); - } - - - // --- prec operator --- - - template< - unsigned i = 0, - typename ... Defs, - typename I = typename utils::type_at>::type::in_type, - typename O = typename utils::type_at>::type::out_type - > - auto prec(const rec_defs& defs) { - return detail::prec_operation{defs}; - } - - template< - unsigned i = 0, - typename First, - typename ... Rest, - typename dummy = typename std::enable_if::value,int>::type - > - auto prec(const First& f, const Rest& ... r) { - return prec(group(f,r...)); - } - - template< - typename BT, typename BC, typename SC, - typename dummy = typename std::enable_if::value,int>::type - > - auto prec(const BT& t, const BC& b, const SC& s) { - return prec<0>(group(fun(t,b,s))); - } - -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/treeture.h b/vendor/allscale/api/core/treeture.h deleted file mode 100644 index bfac6da2d..000000000 --- a/vendor/allscale/api/core/treeture.h +++ /dev/null @@ -1,430 +0,0 @@ -#pragma once - -#include - -/** - * This header file formalizes the general, public interface of treetures, independent - * of any actual implementation. - * - * TODO: extend on this here ... - */ - -#include "allscale/api/core/impl/sequential/treeture.h" -#include "allscale/api/core/impl/reference/treeture.h" - -namespace allscale { -namespace api { -namespace core { - - - // -------------------------------------------------------------------------------------------- - // Treetures - // -------------------------------------------------------------------------------------------- - - - /** - * The actual treeture, referencing the computation of a value. - */ - template - using treeture = impl::reference::treeture; - - /** - * A reference to a sub-task, to create - */ - using task_reference = impl::reference::task_reference; - - - // --------------------------------------------------------------------------------------------- - // Auxiliary Construct - // --------------------------------------------------------------------------------------------- - - - namespace detail { - - template - struct completed_task { - - using value_type = T; - - T value; - - operator impl::sequential::unreleased_treeture() { - return impl::sequential::done(value); - } - - operator impl::reference::unreleased_treeture() { - return impl::reference::done(value); - } - - operator impl::sequential::treeture() { - return impl::sequential::done(value); - } - - operator impl::reference::treeture() { - return impl::reference::done(value); - } - - T get() { - return value; - } - - }; - - template<> - struct completed_task { - - using value_type = void; - - operator impl::sequential::unreleased_treeture() { - return impl::sequential::done(); - } - - operator impl::reference::unreleased_treeture() { - return impl::reference::done(); - } - - operator impl::sequential::treeture() { - return impl::sequential::done(); - } - - operator impl::reference::treeture() { - return impl::reference::done(); - } - - void get() { - } - - }; - - } - - - // --------------------------------------------------------------------------------------------- - // Operators - // --------------------------------------------------------------------------------------------- - - // --- dependencies --- - - class no_dependencies { - - public: - - operator impl::sequential::dependencies() const { - return impl::sequential::after(); - } - - operator impl::reference::dependencies>() const { - return impl::reference::after(); - } - - operator impl::reference::dependencies() const { - return impl::reference::after(std::vector()); - } - - }; - - // --- utility to identify dependencies --- - - template - struct is_dependency : public std::false_type {}; - - template<> - struct is_dependency : public std::true_type {}; - - template<> - struct is_dependency : public std::true_type {}; - - template - struct is_dependency> : public std::true_type {}; - - - // -- no dependencies -- - - inline auto after() { - return no_dependencies(); - } - - - // -- sequential -- - - template - auto after(const impl::sequential::task_reference& first, const Rest& ... rest) { - return impl::sequential::after(first, rest...); - } - - - // -- reference -- - - template - auto after(const impl::reference::task_reference& first, const Rest& ... rest) { - return impl::reference::after(first, rest...); - } - - - // --- releasing tasks --- - - template - inline impl::sequential::treeture run(impl::sequential::unreleased_treeture&& treeture) { - return std::move(treeture).release(); - } - - template - inline impl::sequential::treeture run(impl::sequential::lazy_unreleased_treeture&& treeture) { - return std::move(treeture).release(); - } - - template - inline impl::reference::treeture run(impl::reference::unreleased_treeture&& treeture) { - return std::move(treeture).release(); - } - - - // --- completed tasks --- - - inline detail::completed_task done() { - return detail::completed_task(); - } - - template - detail::completed_task done(const T& value) { - return detail::completed_task{value}; - } - - - // --- control flow --- - - - namespace detail { - - /** - * Different implementations utilized by this reference implementation. - */ - - struct DoneImpl { - - template - auto convertParameter(completed_task&& a) const { - return std::move(a); - } - - template - auto sequential(completed_task&&,completed_task&&) { - return done(); - } - - template - auto sequential(const D&, completed_task&&,completed_task&&) { - return done(); - } - - template - auto parallel(completed_task&&,completed_task&&) { - return done(); - } - - template - auto parallel(const D&, completed_task&&,completed_task&&) { - return done(); - } - - template - auto combine(completed_task&& a, completed_task&& b, M&& m, bool) { - return done(m(a.get(),b.get())); - } - - template - auto combine(const D&, completed_task&& a, completed_task&& b, M&& m, bool) { - return done(m(a.get(),b.get())); - } - - }; - - struct SequentialImpl { - - template - auto convertParameter(completed_task&& a) const { - return impl::sequential::done(a.get()); - } - - template - auto convertParameter(impl::sequential::lazy_unreleased_treeture&& a) const { - return std::move(a); - } - - template - auto sequential(impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b) { - return impl::sequential::seq(std::move(a),std::move(b)); - } - - template - auto sequential(impl::sequential::dependencies&& deps, impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b) { - return impl::sequential::seq(std::move(deps),std::move(a),std::move(b)); - } - - template - auto parallel(impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b) { - return impl::sequential::par(std::move(a),std::move(b)); - } - - template - auto parallel(impl::sequential::dependencies&& deps, impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b) { - return impl::sequential::par(std::move(deps),std::move(a),std::move(b)); - } - - template - auto combine(impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b, M&& m, bool parallel) { - return impl::sequential::combine(std::move(a),std::move(b),std::move(m), parallel); - } - - template - auto combine(impl::sequential::dependencies&& deps, impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b, M&& m, bool parallel) { - return impl::sequential::combine(std::move(deps),std::move(a),std::move(b),std::move(m), parallel); - } - }; - - struct ReferenceImpl { - - template - auto convertParameter(completed_task&& a) const { - return impl::reference::done(a.get()); - } - - template - auto convertParameter(impl::reference::unreleased_treeture&& a) const { - return std::move(a); - } - - template - auto sequential(impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b) { - return impl::reference::seq(std::move(a),std::move(b)); - } - - template - auto sequential(impl::reference::dependencies&& deps, impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b) { - return impl::reference::seq(std::move(deps),std::move(a),std::move(b)); - } - - template - auto parallel(impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b) { - return impl::reference::par(std::move(a),std::move(b)); - } - - template - auto parallel(impl::reference::dependencies&& deps, impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b) { - return impl::reference::par(std::move(deps),std::move(a),std::move(b)); - } - - template - auto combine(impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b, M&& m, bool parallel) { - return impl::reference::combine(std::move(a),std::move(b),std::move(m), parallel); - } - - template - auto combine(impl::reference::dependencies&& deps, impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b, M&& m, bool parallel) { - return impl::reference::combine(std::move(deps),std::move(a),std::move(b),std::move(m), parallel); - } - }; - - - /** - * A mapping of parameter combinations to implementations: - * - * done, done -> done - * - * seq , seq -> seq - * seq , done -> seq - * done, seq -> seq - * - * ref , ref -> ref - * ref , done -> ref - * done, ref -> ref - * - * others are illegal - */ - - template - struct implementation; - - template - struct implementation,completed_task> : public DoneImpl {}; - - template - struct implementation,impl::sequential::lazy_unreleased_treeture> : public SequentialImpl {}; - - template - struct implementation,completed_task> : public SequentialImpl {}; - - template - struct implementation,impl::sequential::lazy_unreleased_treeture> : public SequentialImpl {}; - - template - struct implementation,impl::reference::unreleased_treeture> : public ReferenceImpl {}; - - template - struct implementation,completed_task> : public ReferenceImpl {}; - - template - struct implementation,impl::reference::unreleased_treeture> : public ReferenceImpl {}; - - } - - - // -- sequential -- - - template::value,int>::type = 1> - auto sequential(D&& deps, A&& a, B&& b) { - detail::implementation impl; - return impl.sequential(std::move(deps),impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b))); - } - - template - auto sequential(A&& a, B&& b) { - detail::implementation impl; - return impl.sequential(impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b))); - } - - template - auto sequential(A&& a, B&& b, Rest&& ... rest) { - return sequential(sequential(std::move(a),std::move(b)),std::move(rest)...); - } - - - // -- parallel -- - - template::value,int>::type = 1> - auto parallel(D&& deps, A&& a, B&& b) { - detail::implementation impl; - return impl.parallel(std::move(deps),impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b))); - } - - template - auto parallel(A&& a, B&& b) { - detail::implementation impl; - return impl.parallel(impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b))); - } - - template - auto parallel(A&& a, B&& b, Rest&& ... rest) { - return parallel(parallel(std::move(a),std::move(b)),std::move(rest)...); - } - - // --- aggregation --- - - - template - auto combine(D&& deps, A&& a, B&& b, M&& m, bool parallel = true) { - detail::implementation impl; - return impl.combine(std::move(deps),impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b)), std::move(m), parallel); - } - - template - auto combine(A&& a, B&& b, M&& m, bool parallel = true) { - detail::implementation impl; - return impl.combine(impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b)), std::move(m), parallel); - } - - -} // end namespace core -} // end namespace api -} // end namespace allscale - diff --git a/vendor/allscale/api/user/algorithm/async.h b/vendor/allscale/api/user/algorithm/async.h deleted file mode 100644 index 51d1c502a..000000000 --- a/vendor/allscale/api/user/algorithm/async.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include - -#include "allscale/utils/assert.h" - -#include "allscale/api/core/prec.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { - - - - // --------------------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------------------- - - - /** - * A simple job wrapper processing a given task asynchronously. The task - * is wrapped to a simple recursion where there is a single base - * case step. - * - * @tparam Action the type of action - * @param action the action to be processed - * @return a treeture providing a reference the the result - */ - template - core::treeture> async(const Action& action); - - - /** - * A simple job wrapper processing a given task asynchronously after the - * given dependencies are satisfied. The task is wrapped to a simple recursion - * where there is a single base case step. - * - * @tparam Dependencies the dependencies to await - * @tparam Action the type of action - * @param action the action to be processed - * @return a treeture providing a reference the the result - */ - template - core::treeture> async(Dependencies&& deps, const Action& action); - - - - - // --------------------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------------------- - - - template - core::treeture> async(const Action& action) { - return async(core::after(), action); - } - - - template - core::treeture> async(Dependencies&& deps, const Action& action) { - struct empty {}; - return core::prec( - [](empty){ return true; }, - [=](empty){ - return action(); - }, - [=](empty,const auto&){ - assert_fail() << "Should not be reached!"; - return action(); - } - )(std::move(deps), empty()); - } - - -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/algorithm/internal/operation_reference.h b/vendor/allscale/api/user/algorithm/internal/operation_reference.h deleted file mode 100644 index 1e754f534..000000000 --- a/vendor/allscale/api/user/algorithm/internal/operation_reference.h +++ /dev/null @@ -1,114 +0,0 @@ -#include - -#include "allscale/api/core/treeture.h" - -#include "allscale/utils/assert.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { -namespace internal { - - - /** - * An operation reference is an (optional) base implementation - * of the return values of asynchronous operations. Unlike plain - * treetures, operator references are waiting for their tasks - * to be completed before destruction. - */ - class operation_reference { - - /** - * The treeture wrapped by this references, which corresponds - * to the root task of the asynchronously started task. - */ - core::treeture handle; - - public: - - /** - * A simple constructor taking 'ownership' on the given treeture. - */ - operation_reference(core::treeture&& handle) - : handle(std::move(handle)) {} - - /** - * A simple constructor taking 'ownership' on the given completed task. - */ - operation_reference(core::detail::completed_task&&) - : handle() {} - - /** - * A default constructor, not owning or syncing on anything. - */ - operation_reference() {}; - - /** - * Operation references may not be copied. - */ - operation_reference(const operation_reference&) = delete; - - /** - * Operation references may be moved. - */ - operation_reference(operation_reference&&) = default; - - /** - * Operation references may not be copied. - */ - operation_reference& operator=(const operation_reference&) = delete; - - /** - * Operation references may be moved. - */ - operation_reference& operator=(operation_reference&&) = default; - - /** - * Upon destruction, the references is waiting on the underlying - * task if it is still owned. - */ - ~operation_reference() { - // if handle is still valid, wait for its completion - if (handle.isValid()) handle.wait(); - } - - /** - * A non-blocking check whether the referenced operation is done. - */ - bool isDone() const { - return handle.isDone(); - } - - /** - * Determines whether a task is attached to this reference. - */ - bool isValid() const { - return handle.isValid(); - } - - /** - * Disconnects the referenced task, causing this reference no longer - * to wait on the given task upon destruction. - * - * @return returns the maintained task handle - */ - core::treeture detach() { - return std::move(handle); - } - - /** - * Blocks until the underlying operation has been completed. - */ - void wait() { - handle.wait(); - } - - }; - - -} // end namespace internal -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/algorithm/pfor.h b/vendor/allscale/api/user/algorithm/pfor.h deleted file mode 100644 index 881f676b2..000000000 --- a/vendor/allscale/api/user/algorithm/pfor.h +++ /dev/null @@ -1,1758 +0,0 @@ -#pragma once - -#include - -#include "allscale/utils/assert.h" - -#include "allscale/api/core/prec.h" - -#include "allscale/utils/vector.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { - - // ----- forward declarations ------ - - namespace detail { - - /** - * The object representing the iterator range of a (parallel) loop. - */ - template - class range; - - - // -- Adaptive Loop Dependencies -- - - /** - * The token produced by the pfor operator to reference the execution - * of a parallel loop. - */ - template - class loop_reference; - - /** - * A marker type for loop dependencies. - */ - struct loop_dependency {}; - - /** - * A test for loop dependencies. - */ - template - struct is_loop_dependency : public std::is_base_of {}; - - /** - * A small container for splitting dependencies. - */ - template - struct SubDependencies { - Dependency left; - Dependency right; - }; - - } // end namespace detail - - /** - * The dependency to be used if no dependencies are required. - */ - struct no_dependencies : public detail::loop_dependency { - - detail::SubDependencies split() const { - return detail::SubDependencies(); - } - - }; - - // --------------------------------------------------------------------------------------------- - // Basic Generic pfor Operators - // --------------------------------------------------------------------------------------------- - - /** - * The generic version of all parallel loops with synchronization dependencies. - * - * @tparam Iter the type of the iterator to pass over - * @tparam Body the type of the body operation, thus the operation to be applied on each element in the given range - * @tparam Dependency the type of the dependencies to be enforced - * - * @param r the range to iterate over - * @param body the operation to be applied on each element of the given range - * @param dependency the dependencies to be obeyed when scheduling the iterations of this parallel loop - * - * @return a reference to the iterations of the processed parallel loop to be utilized for forming dependencies - */ - template - detail::loop_reference pfor(const detail::range& r, const Body& body, const Dependency& dependency); - - /** - * The generic version of all parallel loops without synchronization dependencies. - * - * @tparam Iter the type of the iterator to pass over - * @tparam Body the type of the body operation, thus the operation to be applied on each element in the given range - * - * @param r the range to iterate over - * @param body the operation to be applied on each element of the given range - * - * @return a reference to the iterations of the processed parallel loop to be utilized for forming dependencies - */ - template - detail::loop_reference pfor(const detail::range& r, const Body& body, const no_dependencies& = no_dependencies()); - - - // --------------------------------------------------------------------------------------------- - // pfor Operators with Boundaries - // --------------------------------------------------------------------------------------------- - - /** - * The generic version of all parallel loops with synchronization dependencies. - * - * @tparam Iter the type of the iterator to pass over - * @tparam InnerBody the type of the inner body operation, thus the operation to be applied on each element in the given range that is not on the surface - * @tparam BoundaryBody the type of the boundary body operation, thus the operation to be applied on each element in the given range that is on the surface - * @tparam Dependency the type of the dependencies to be enforced - * - * @param r the range to iterate over - * @param innerBody the operation to be applied on each element of the given range that is not on the surface - * @param boundaryBody the operation to be applied on each element of the given range that is on the surface - * @param dependency the dependencies to be obeyed when scheduling the iterations of this parallel loop - * - * @return a reference to the iterations of the processed parallel loop to be utilized for forming dependencies - */ - template - detail::loop_reference pforWithBoundary(const detail::range& r, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const Dependency& dependency); - - /** - * The generic version of all parallel loops without synchronization dependencies. - * - * @tparam Iter the type of the iterator to pass over - * @tparam InnerBody the type of the inner body operation, thus the operation to be applied on each element in the given range that is not on the surface - * @tparam BoundaryBody the type of the boundary body operation, thus the operation to be applied on each element in the given range that is on the surface - * - * @param r the range to iterate over - * @param innerBody the operation to be applied on each element of the given range that is not on the surface - * @param boundaryBody the operation to be applied on each element of the given range that is on the surface - * - * @return a reference to the iterations of the processed parallel loop to be utilized for forming dependencies - */ - template - detail::loop_reference pforWithBoundary(const detail::range& r, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const no_dependencies& = no_dependencies()); - - - // --------------------------------------------------------------------------------------------- - // The after Utility - // --------------------------------------------------------------------------------------------- - - /** - * A generic utility for inserting a single action into a single a chain of dependencies. The given action will be triggered - * once the corresponding iteration in the given loop reference has been completed. The resulting loop reference can be utilized - * by consecutive operations to synchronize on the completion of the concatenation of the given loop reference and inserted action. - * - * @tparam Iter the type of iterator the preceding loop operated on - * @tparam Point the iterator value of the point this action shell be associated to - * @tparam Action the type of action to be performed - * - * @param loop preceding loop - * @param point the point to which this event shell be associated to - * @param action the action to be performed - * @return a customized loop reference to sync upon the concatenation of this - */ - template - detail::loop_reference after(const detail::loop_reference& loop, const Point& point, const Action& action); - - - // --------------------------------------------------------------------------------------------- - // adapters for the pfor operator - // --------------------------------------------------------------------------------------------- - - template - detail::loop_reference> pfor(const std::array& a, const std::array& b, const Body& body) { - return pfor(detail::range>(a,b),body); - } - - template - detail::loop_reference> pfor(const std::array& a, const std::array& b, const Body& body, const Dependency& dependency) { - return pfor(detail::range>(a,b),body,dependency); - } - - template - detail::loop_reference> pforWithBoundary(const std::array& a, const std::array& b, const InnerBody& innerBody, const BoundaryBody& boundaryBody) { - return pforWithBoundary(detail::range>(a,b),innerBody,boundaryBody); - } - - template - detail::loop_reference> pforWithBoundary(const std::array& a, const std::array& b, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const Dependency& dependency) { - return pforWithBoundary(detail::range>(a,b),innerBody,boundaryBody,dependency); - } - - /** - * A parallel for-each implementation iterating over the given range of elements. - */ - template - detail::loop_reference pfor(const Iter& a, const Iter& b, const Body& body, const Dependency& dependency) { - return pfor(detail::range(a,b),body,dependency); - } - - template - detail::loop_reference pfor(const Iter& a, const Iter& b, const Body& body) { - return pfor(a,b,body,no_dependencies()); - } - - template - detail::loop_reference pforWithBoundary(const Iter& a, const Iter& b, const InnerBody& innerBody, const BoundaryBody& boundaryBody) { - return pforWithBoundary(detail::range(a,b),innerBody,boundaryBody); - } - - template - detail::loop_reference pforWithBoundary(const Iter& a, const Iter& b, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const Dependency& dependency) { - return pforWithBoundary(detail::range(a,b),innerBody,boundaryBody,dependency); - } - - // ---- container support ---- - - /** - * A parallel for-each implementation iterating over the elements of the given, mutable container. - */ - template - detail::loop_reference - pfor(Container& c, const Op& op) { - return pfor(c.begin(), c.end(), op); - } - - /** - * A parallel for-each implementation iterating over the elements of the given, mutable container. - */ - template - std::enable_if_t::value,detail::loop_reference> - pfor(Container& c, const Op& op, const Dependency& dependency) { - return pfor(c.begin(), c.end(), op, dependency); - } - - - /** - * A parallel for-each implementation iterating over the elements of the given container. - */ - template - detail::loop_reference - pfor(const Container& c, const Op& op) { - return pfor(c.begin(), c.end(), op); - } - - /** - * A parallel for-each implementation iterating over the elements of the given container. - */ - template - detail::loop_reference - pfor(const Container& c, const Op& op, const Dependency& dependency) { - return pfor(c.begin(), c.end(), op, dependency); - } - - - // ---- Vector support ---- - - /** - * A parallel for-each implementation iterating over the elements of the points covered by - * the hyper-box limited by the given vectors. - */ - template - detail::loop_reference> pfor(const utils::Vector& a, const utils::Vector& b, const Body& body) { - return pfor(detail::range>(a,b),body); - } - - /** - * A parallel for-each implementation iterating over the elements of the points covered by - * the hyper-box limited by the given vectors. Optional dependencies may be passed. - */ - template - detail::loop_reference> pfor(const utils::Vector& a, const utils::Vector& b, const Body& body, const Dependencies& dependencies) { - return pfor(detail::range>(a,b),body,dependencies); - } - - /** - * A parallel for-each implementation iterating over the elements of the points covered by - * the hyper-box limited by the given vector. - */ - template - auto pfor(const utils::Vector& a, const Body& body) { - return pfor(utils::Vector(0),a,body); - } - - /** - * A parallel for-each implementation iterating over the elements of the points covered by - * the hyper-box limited by the given vector. Optional dependencies may be passed. - */ - template - auto pfor(const utils::Vector& a, const Body& body, const Dependencies& dependencies) { - return pfor(utils::Vector(0),a,body,dependencies); - } - - // ------------------------------------------------------------------------------------------- - // Adaptive Synchronization - // ------------------------------------------------------------------------------------------- - - - /** - * A dependency forming the conjunction of a list of given dependencies. - */ - template - class conjunction_sync_dependency; - - /** - * A factory for a conjunction of dependencies. - */ - template - conjunction_sync_dependency sync_all(const Dependencies& ... dependencies) { - return conjunction_sync_dependency(dependencies...); - } - - /** - * A dependency actually representing no dependency. Could be used as a place-holder. - */ - class no_dependency; - - /** - * A factory for no synchronization dependencies. Could be used as a place-holder. - */ - no_dependency no_sync(); - - /** - * A dependency between loop iterations where iteration i of a new parallel loop may be executed - * as soon as iteration i of a given parallel loop has been completed. - * - * @param Iter the iterator type utilized to address iterations - */ - template - class one_on_one_dependency; - - /** - * A factory for one_on_one dependencies. - */ - template - one_on_one_dependency one_on_one(const detail::loop_reference& dep) { - return one_on_one_dependency(dep); - } - - /** - * A dependency between loop iterations where iteration i of a new parallel loop may be executed - * as soon as iterations { i + c | c \in {-1,0,1}^n && |c| <= 1 } of a given parallel loop has been completed. - * - * @param Iter the iterator type utilized to address iterations - */ - template - class small_neighborhood_sync_dependency; - - /** - * A factory for small neighborhood sync dependencies. - */ - template - small_neighborhood_sync_dependency small_neighborhood_sync(const detail::loop_reference& dep) { - return small_neighborhood_sync_dependency(dep); - } - - /** - * A dependency between loop iterations where iteration i of a new parallel loop may be executed - * as soon as iterations { i + c | c \in {-1,0,1}^n } of a given parallel loop has been completed. - * - * @param Iter the iterator type utilized to address iterations - */ - template - class full_neighborhood_sync_dependency; - - /** - * A factory for full neighborhood sync dependencies. - */ - template - full_neighborhood_sync_dependency full_neighborhood_sync(const detail::loop_reference& dep) { - return full_neighborhood_sync_dependency(dep); - } - - /** - * A dependency between loop iterations where iteration i of a new parallel loop may be executed - * as soon the entire range of a given loop has been executed. - * - * @param Iter the iterator type utilized to address iterations - */ - template - class after_all_sync_dependency; - - /** - * A factory for after-all sync dependencies. - */ - template - after_all_sync_dependency after_all_sync(const detail::loop_reference& dep) { - return after_all_sync_dependency(dep); - } - - - // ------------------------------------------------------------------------------------------- - // Range Utils - // ------------------------------------------------------------------------------------------- - - - namespace detail { - - // -- obtain number of dimensions of an iterator -- - - template - struct dimensions { - enum { value = 1 }; - }; - - template - struct dimensions> { - enum { value = D }; - }; - - template - struct dimensions> { - enum { value = D }; - }; - - // -- distances between begin and end of iterators -- - - template - struct volume { - size_t operator()(const Iter& a, const Iter& b) const { - return std::distance(a,b); - } - }; - - template - struct volume::value,bool>> { - size_t operator()(Int a, Int b) const { - return (a < b) ? b-a : 0; - } - }; - - template - struct volume> { - size_t operator()(const std::array& a, const std::array& b) const { - volume inner; - size_t res = 1; - for(size_t i = 0; i - struct volume> { - size_t operator()(const utils::Vector& a, const utils::Vector& b) const { - return volume>()(a,b); - } - }; - - // -- minimum distance between elements along individual dimensions -- - - template - struct min_dimension_length { - size_t operator()(const Iter& a, const Iter& b) const { - return std::distance(a,b); - } - }; - - template - struct min_dimension_length::value,bool>> { - size_t operator()(Int a, Int b) const { - return (a < b) ? b-a : 0; - } - }; - - template - struct min_dimension_length> { - size_t operator()(const std::array& a, const std::array& b) const { - min_dimension_length inner; - size_t res = std::numeric_limits::max(); - for(size_t i = 0; i - struct min_dimension_length> { - size_t operator()(const utils::Vector& a, const utils::Vector& b) const { - return min_dimension_length>()(a,b); - } - }; - - template - size_t getMinimumDimensionLength(const range& r) { - return min_dimension_length()(r.begin(),r.end()); - } - - // -- coverage -- - - template - bool covers(const Iter& a_begin, const Iter& a_end, const Iter& b_begin, const Iter& b_end) { - return b_begin >= b_end || (a_begin <= b_begin && b_end <= a_end); - } - - template - bool covers(const utils::Vector& a_begin, const utils::Vector& a_end, const utils::Vector& b_begin, const utils::Vector& b_end) { - // if the second is empty, it is covered - for(size_t i=0; i= b_end[i]) return true; - } - // check that a non-empty range is covered - for(size_t i=0; i - bool covers(const Iter& begin, const Iter& end, const Point& p) { - return begin <= p && p < end; - } - - template - bool covers(const utils::Vector& begin, const utils::Vector& end, const utils::Vector& point) { - for(size_t i=0; i - auto access(const Iter& iter) -> decltype(*iter) { - return *iter; - } - - template - typename std::enable_if::value,T>::type access(T a) { - return a; - } - - - // -- scan utility -- - - template - void forEach(const Iter& fullBegin, const Iter& fullEnd, const Iter& a, const Iter& b, const InnerOp& inner, const BoundaryOp& boundary) { - - // cut off empty loop - if (a == b) return; - - // get inner range - Iter innerBegin = a; - Iter innerEnd = b; - - // check for boundaries - if (fullBegin == a) { - boundary(access(a)); - innerBegin++; - } - - // reduce inner range if b is the end - if (fullEnd == b) { - innerEnd--; - } - - // process inner part - for(auto it = innerBegin; it != innerEnd; ++it) { - inner(access(it)); - } - - // process left boundary - if(fullEnd == b) { - boundary(access(b-1)); - } - } - - - template - void forEach(const Iter& a, const Iter& b, const InnerOp& inner, const BoundaryOp& boundary) { - - // cut off empty loop - if (a == b) return; - - // process left boundary - boundary(access(a)); - if (a + 1 == b) return; - - // process inner part - for(auto it = a+1; it != b-1; ++it) { - inner(access(it)); - } - - // process left boundary - boundary(access(b-1)); - } - - template - void forEach(const Iter& a, const Iter& b, const Op& op) { - for(auto it = a; it != b; ++it) { - op(access(it)); - } - } - - template - struct point_factory; - - template - struct point_factory> { - template - std::array operator()(Coordinates ... coordinates) { - return { { coordinates ... } }; - } - }; - - template - struct point_factory> { - template - utils::Vector operator()(Coordinates ... coordinates) { - return utils::Vector(coordinates...); - } - }; - - - template - struct scanner { - scanner nested; - template class Compound, typename Iter, size_t dims, typename Op, typename ... Coordinates> - void operator()(const Compound& begin, const Compound& end, const Op& op, Coordinates ... coordinates) { - auto a = begin[dims-idx]; - auto b = end[dims-idx]; - for(Iter i = a; i != b ; ++i) { - nested(begin,end,op,coordinates...,i); - } - } - }; - - template<> - struct scanner<0> { - template class Compound, typename Iter, size_t dims, typename Op, typename ... Coordinates> - void operator()(const Compound&, const Compound&, const Op& op, Coordinates ... coordinates) { - point_factory> factory; - op(factory(coordinates...)); - } - }; - - template - struct scanner_with_boundary { - scanner_with_boundary nested; - template class Compound, typename Iter, size_t dims, typename Op> - void operator()(const Compound& begin, const Compound& end, Compound& cur, const Op& op) { - auto& i = cur[dims-idx]; - for(i = begin[dims-idx]; i != end[dims-idx]; ++i ) { - nested(begin, end, cur, op); - } - } - template class Compound, typename Iter, size_t dims, typename Inner, typename Boundary> - void operator()(const Compound& begin, const Compound& end, Compound& cur, const Inner& inner, const Boundary& boundary) { - auto& i = cur[dims-idx]; - - // extract range - const auto& a = begin[dims-idx]; - const auto& b = end[dims-idx]; - - // check empty range - if (a==b) return; - - // handle left boundary - i = a; nested(begin,end,cur,boundary); - - // check whether this has been all - if (a + 1 == b) return; - - // process inner part - for(i = a+1; i!=b-1; ++i) { - nested(begin,end,cur,inner,boundary); - } - - // handle right boundary - i = b-1; - nested(begin,end,cur,boundary); - } - - template class Compound, typename Iter, size_t dims, typename Inner, typename Boundary> - void operator()(const Compound& fullBegin, const Compound& fullEnd, const Compound& begin, const Compound& end, Compound& cur, const Inner& inner, const Boundary& boundary) { - auto& i = cur[dims-idx]; - - // extract range - const auto& fa = fullBegin[dims-idx]; - const auto& fb = fullEnd[dims-idx]; - - const auto& a = begin[dims-idx]; - const auto& b = end[dims-idx]; - - // check empty range - if (a==b) return; - - // get inner range - auto ia = a; - auto ib = b; - - // handle left boundary - if (fa == ia) { - i = ia; - nested(begin,end,cur,boundary); - ia++; - } - - if (fb == b) { - ib--; - } - - // process inner part - for(i = ia; i!=ib; ++i) { - nested(fullBegin,fullEnd,begin,end,cur,inner,boundary); - } - - // handle right boundary - if (fb == b) { - i = b-1; - nested(begin,end,cur,boundary); - } - } - }; - - template<> - struct scanner_with_boundary<0> { - template class Compound, typename Iter, size_t dims, typename Op> - void operator()(const Compound&, const Compound&, Compound& cur, const Op& op) { - op(cur); - } - template class Compound, typename Iter, size_t dims, typename Inner, typename Boundary> - void operator()(const Compound&, const Compound&, Compound& cur, const Inner& inner, const Boundary&) { - inner(cur); - } - template class Compound, typename Iter, size_t dims, typename Inner, typename Boundary> - void operator()(const Compound&, const Compound&, const Compound&, const Compound&, Compound& cur, const Inner& inner, const Boundary&) { - inner(cur); - } - }; - - template - void forEach(const std::array& fullBegin, const std::array& fullEnd, const std::array& begin, const std::array& end, const InnerOp& inner, const BoundaryOp& boundary) { - - // the current position - std::array cur; - - // scan range - detail::scanner_with_boundary()(fullBegin, fullEnd, begin, end, cur, inner, boundary); - } - - template - void forEach(const std::array& begin, const std::array& end, const InnerOp& inner, const BoundaryOp& boundary) { - - // the current position - std::array cur; - - // scan range - detail::scanner_with_boundary()(begin, end, cur, inner, boundary); - } - - template - void forEach(const std::array& begin, const std::array& end, const Op& op) { - // scan range - detail::scanner()(begin, end, op); - } - - template - void forEach(const utils::Vector& fullBegin, const utils::Vector& fullEnd, const utils::Vector& begin, const utils::Vector& end, const InnerOp& inner, const BoundaryOp& boundary) { - - // the current position - utils::Vector cur; - - // scan range - detail::scanner_with_boundary()(fullBegin, fullEnd, begin, end, cur, inner, boundary); - } - - template - void forEach(const utils::Vector& begin, const utils::Vector& end, const InnerOp& inner, const BoundaryOp& boundary) { - - // the current position - utils::Vector cur; - - // scan range - detail::scanner_with_boundary()(begin, end, cur, inner, boundary); - } - - - template - void forEach(const utils::Vector& begin, const utils::Vector& end, const Op& op) { - // scan range - detail::scanner()(begin, end, op); - } - - - template - Iter grow(const Iter& value, const Iter& limit, int steps) { - return std::min(limit, value+steps); - } - - template - std::array grow(const std::array& value, const std::array& limit, int steps) { - std::array res; - for(unsigned i=0; i - utils::Vector grow(const utils::Vector& value, const utils::Vector& limit, int steps) { - utils::Vector res; - for(unsigned i=0; i - Iter shrink(const Iter& value, const Iter& limit, int steps) { - return std::max(limit, value-steps); - } - - template - std::array shrink(const std::array& value, const std::array& limit, int steps) { - std::array res; - for(unsigned i=0; i - utils::Vector shrink(const utils::Vector& value, const utils::Vector& limit, int steps) { - utils::Vector res; - for(unsigned i=0; i - struct fragments { - range left; - range right; - }; - - template - fragments make_fragments(const range& left, const range& right) { - return fragments{ left, right }; - } - - template - struct range_spliter; - - /** - * The object representing the iterator range of a (parallel) loop. - */ - template - class range { - - /** - * The begin of this range (inclusive). - */ - Iter _begin; - - /** - * The end of this range (exclusive). - */ - Iter _end; - - public: - - range() : _begin(), _end() {} - - range(const Iter& begin, const Iter& end) - : _begin(begin), _end(end) { - if (empty()) { _end = _begin; } - } - - size_t size() const { - return detail::volume()(_begin,_end); - } - - bool empty() const { - return size() == 0; - } - - const Iter& begin() const { - return _begin; - } - - const Iter& end() const { - return _end; - } - - bool covers(const range& r) const { - return detail::covers(_begin,_end,r._begin,r._end); - } - - template - bool covers(const Point& p) const { - return detail::covers(_begin,_end,p); - } - - range grow(const range& limit, int steps = 1) const { - return range( - detail::shrink(_begin,limit.begin(),steps), - detail::grow(_end,limit.end(),steps) - ); - } - - range shrink(int steps = 1) const { - return grow(*this, -steps); - } - - fragments split(std::size_t depth) const { - return range_spliter::split(depth,*this); - } - - template - void forEach(const Op& op) const { - detail::forEach(_begin,_end,op); - } - - template - void forEachWithBoundary(const range& full, const InnerOp& inner, const BoundaryOp& boundary) const { - detail::forEach(full._begin,full._end,_begin,_end,inner,boundary); - } - - friend std::ostream& operator<<(std::ostream& out, const range& r) { - return out << "[" << r.begin() << "," << r.end() << ")"; - } - - }; - - template - struct range_spliter { - - using rng = range; - - static fragments split(std::size_t, const rng& r) { - const auto& a = r.begin(); - const auto& b = r.end(); - auto m = a + (b - a)/2; - return make_fragments(rng(a,m),rng(m,b)); - } - - static std::size_t getSplitDimension(std::size_t) { - return 0; - } - }; - - template< - template class Container, - typename Iter, size_t dims - > - struct range_spliter> { - - using rng = range>; - - static fragments> split(std::size_t depth, const rng& r) { - - __allscale_unused const auto volume = detail::volume>(); - - // get split dimension - auto splitDim = getSplitDimension(depth); - - // compute range fragments - const auto& begin = r.begin(); - const auto& end = r.end(); - - // split the longest dimension, keep the others as they are - auto midA = end; - auto midB = begin; - midA[splitDim] = midB[splitDim] = range_spliter::split(depth,range(begin[splitDim],end[splitDim])).left.end(); - - // make sure no points got lost - assert_eq(volume(begin,end), volume(begin,midA) + volume(midB,end)); - - // create result - return make_fragments(rng(begin,midA),rng(midB,end)); - } - - static std::size_t getSplitDimension(std::size_t depth) { - return depth % dims; - } - - }; - - } // end namespace detail - - - - // ------------------------------------------------------------------------------------------- - // Synchronization Definitions - // ------------------------------------------------------------------------------------------- - - namespace detail { - - /** - * An entity to reference ranges of iterations of a loop. - */ - template - class iteration_reference { - - /** - * The range covered by the iterations referenced by this object. - */ - range _range; - - /** - * The reference to the task processing the covered range. - */ - core::task_reference handle; - - /** - * The recursive depth of the referenced iteration range. - */ - std::size_t depth; - - public: - - iteration_reference(const range& range, const core::task_reference& handle, std::size_t depth) - : _range(range), handle(handle), depth(depth) {} - - iteration_reference(const range& _range = range()) : _range(_range), depth(0) {} - - iteration_reference(const iteration_reference&) = default; - iteration_reference(iteration_reference&&) = default; - - iteration_reference& operator=(const iteration_reference&) = default; - iteration_reference& operator=(iteration_reference&&) = default; - - void wait() const { - if (handle.valid()) handle.wait(); - } - - iteration_reference getLeft() const { - return { range_spliter::split(depth,_range).left, handle.getLeft(), depth+1 }; - } - - iteration_reference getRight() const { - return { range_spliter::split(depth,_range).right, handle.getRight(), depth+1 }; - } - - operator core::task_reference() const { - return handle; - } - - const range& getRange() const { - return _range; - } - - const core::task_reference& getHandle() const { - return handle; - } - - std::size_t getDepth() const { - return depth; - } - }; - - - /** - * An entity to reference the full range of iterations of a loop. This token - * can not be copied and will wait for the completion of the loop upon destruction. - */ - template - class loop_reference : public iteration_reference { - - public: - - loop_reference(const range& range, core::treeture&& handle) - : iteration_reference(range, std::move(handle), 0) {} - - loop_reference() {}; - loop_reference(const loop_reference&) = delete; - loop_reference(loop_reference&&) = default; - - loop_reference& operator=(const loop_reference&) = delete; - loop_reference& operator=(loop_reference&&) = default; - - ~loop_reference() { this->wait(); } - - }; - - } // end namespace detail - - - - // --------------------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------------------- - - - template - detail::loop_reference pfor(const detail::range& r, const Body& body, const Dependency& dependency) { - - struct RecArgs { - std::size_t depth; - detail::range range; - Dependency dependencies; - }; - - // trigger parallel processing - return { r, core::prec( - [](const RecArgs& rg) { - // if there is only one element left, we reached the base case - return rg.range.size() <= 1; - }, - [body](const RecArgs& rg) { - // apply the body operation to every element in the remaining range - rg.range.forEach(body); - }, - core::pick( - [](const RecArgs& rg, const auto& nested) { - // in the step case we split the range and process sub-ranges recursively - auto fragments = rg.range.split(rg.depth); - auto& left = fragments.left; - auto& right = fragments.right; - auto dep = rg.dependencies.split(left,right); - return core::parallel( - nested(dep.left.toCoreDependencies(), RecArgs{rg.depth+1, left, dep.left} ), - nested(dep.right.toCoreDependencies(), RecArgs{rg.depth+1, right,dep.right}) - ); - }, - [body](const RecArgs& rg, const auto&) { - // the alternative is processing the step sequentially - rg.range.forEach(body); - } - ) - )(dependency.toCoreDependencies(),RecArgs{0,r,dependency}) }; - } - - template - detail::loop_reference pfor(const detail::range& r, const Body& body, const no_dependencies&) { - - struct RecArgs { - std::size_t depth; - detail::range range; - }; - - // trigger parallel processing - return { r, core::prec( - [](const RecArgs& r) { - // if there is only one element left, we reached the base case - return r.range.size() <= 1; - }, - [body](const RecArgs& r) { - // apply the body operation to every element in the remaining range - r.range.forEach(body); - }, - core::pick( - [](const RecArgs& r, const auto& nested) { - // in the step case we split the range and process sub-ranges recursively - auto fragments = r.range.split(r.depth); - return core::parallel( - nested(RecArgs{r.depth+1,fragments.left}), - nested(RecArgs{r.depth+1,fragments.right}) - ); - }, - [body](const RecArgs& r, const auto&) { - // the alternative is processing the step sequentially - r.range.forEach(body); - } - ) - )(RecArgs{0,r}) }; - } - - class no_dependency : public detail::loop_dependency { - - public: - - auto toCoreDependencies() const { - return core::after(); - } - - template - detail::SubDependencies split(const Range&, const Range&) const { - // split dependencies, which is actually nothing to do ... - return { no_dependency(), no_dependency() }; - - } - - friend std::ostream& operator<< (std::ostream& out, const no_dependency&) { - return out << "none"; - } - - }; - - inline no_dependency no_sync() { - return no_dependency(); - } - - // -------------------------------------------------------------------------------------------------------- - - template - class conjunction_sync_dependency : public detail::loop_dependency { - - using nested_type = conjunction_sync_dependency; - - First first; - - nested_type nested; - - conjunction_sync_dependency(const First& first, const nested_type& nested) - : first(first), nested(nested) {} - - public: - - conjunction_sync_dependency(const First& first, const Rest& ... rest) - : first(first), nested(rest...) {} - - auto toCoreDependencies() const { - return concat(first.toCoreDependencies(),nested.toCoreDependencies()); - } - - template - detail::SubDependencies split(const detail::range& left, const detail::range& right) const { - - // get fragments - auto firstFragments = first.split(left,right); - auto nestedFragments = nested.split(left,right); - - // create resulting dependencies - return { - { firstFragments.left, nestedFragments.left }, - { firstFragments.right, nestedFragments.right } - }; - - } - - friend std::ostream& operator<< (std::ostream& out, const conjunction_sync_dependency& dep) { - return out << dep.first << " && " << dep.nested; - } - - }; - - // special case for a conjunction of a single dependency - this is just that dependency - template - class conjunction_sync_dependency : public Dependency { - public: - conjunction_sync_dependency(const Dependency& dep) : Dependency(dep) {} - }; - - // special case for an empty conjunction - this is no dependency - template<> - class conjunction_sync_dependency<> : public no_dependency { - public: - conjunction_sync_dependency() : no_dependency() {} - conjunction_sync_dependency(const no_dependency& dep) : no_dependency(dep) {} - }; - - // -------------------------------------------------------------------------------------------------------- - - template - class one_on_one_dependency : public detail::loop_dependency { - - detail::iteration_reference loop; - - public: - - one_on_one_dependency(const detail::iteration_reference& loop) - : loop(loop) {} - - auto getCenterRange() const { - return loop.getRange(); - } - - core::impl::reference::dependencies> toCoreDependencies() const { - return core::after(loop.getHandle()); - } - - detail::SubDependencies> split(const detail::range& left, const detail::range& right) const { - - // get left and right loop fragments - auto loopLeft = loop.getLeft(); - auto loopRight = loop.getRight(); - - // split dependencies, thereby checking range coverage - return { - // we take the sub-task if it covers the targeted range, otherwise we stick to the current range - loopLeft.getRange().covers(left) ? one_on_one_dependency{loopLeft} : *this, - loopRight.getRange().covers(right) ? one_on_one_dependency{loopRight} : *this - }; - - } - - friend std::ostream& operator<< (std::ostream& out, const one_on_one_dependency& dep) { - return out << dep.loop.getRange(); - } - - }; - - - template - class small_neighborhood_sync_dependency : public detail::loop_dependency { - - // determine the number of dimensions - enum { num_dimensions = detail::dimensions::value }; - - // the type of iteration dependency - using iteration_reference = detail::iteration_reference; - - // on each dimension, two dependencies are stored in each direction - struct deps_pair { - iteration_reference left; - iteration_reference right; - }; - - // save two dependencies for each dimension - using deps_list = std::array; - - // on dependency covering the central area - iteration_reference center; - - // the neighboring dependencies - deps_list neighborhood; - - // and internal constructor required by the split operation - small_neighborhood_sync_dependency() {} - - public: - - small_neighborhood_sync_dependency(const iteration_reference& loop) - : center(loop), neighborhood() {} - - const detail::range& getCenterRange() const { - return center.getRange(); - } - - std::vector> getRanges() const { - std::vector> res; - res.push_back(center.getRange()); - for(std::size_t i=0; i - core::impl::reference::dependencies> toCoreDependencies(const std::index_sequence&) const { - return core::after( - center, - neighborhood[Dims].left ..., - neighborhood[Dims].right ... - ); - } - - public: - - core::impl::reference::dependencies> toCoreDependencies() const { - return toCoreDependencies(std::make_index_sequence()); - } - - detail::SubDependencies> split(const detail::range& left, const detail::range& right) const { - - using splitter = detail::range_spliter; - - // create new left and right dependencies - small_neighborhood_sync_dependency res_left; - small_neighborhood_sync_dependency res_right; - - // update center - res_left.center = center.getLeft(); - res_right.center = center.getRight(); - - // update neighbors except split dimension - bool save_left = true; - bool save_right = true; - auto splitDim = splitter::getSplitDimension(center.getDepth()); - for(std::size_t i =0; i - struct full_dependency_block { - - using iteration_reference = detail::iteration_reference; - - using nested = full_dependency_block; - - enum { num_dependencies = nested::num_dependencies * 3 }; - - std::array dependencies; - - void setCenter(const iteration_reference& ref) { - dependencies[1].setCenter(ref); - } - - const iteration_reference& getCenter() const { - return dependencies[1].getCenter(); - } - - template - void forEach(const Op& op) const { - dependencies[0].forEach(op); - dependencies[1].forEach(op); - dependencies[2].forEach(op); - } - - core::impl::reference::dependencies> toCoreDependencies() const { - return produceCoreDependencies(*this); - } - - template - static auto produceCoreDependencies(const Blocks& ... blocks) { - return nested::template produceCoreDependencies(blocks.dependencies[0]...,blocks.dependencies[1]...,blocks.dependencies[2]...); - } - - full_dependency_block narrowLeft(bool& save, std::size_t splitDimension, std::size_t radius) const { - full_dependency_block res; - if (Dims - 1 == splitDimension) { - res.dependencies[0] = dependencies[0].narrowRight(save,splitDimension, radius); - res.dependencies[1] = dependencies[1].narrowLeft(save,splitDimension, radius); - res.dependencies[2] = dependencies[1].narrowRight(save,splitDimension, radius); - } else { - res.dependencies[0] = dependencies[0].narrowLeft(save,splitDimension, radius); - res.dependencies[1] = dependencies[1].narrowLeft(save,splitDimension, radius); - res.dependencies[2] = dependencies[2].narrowLeft(save,splitDimension, radius); - } - return res; - } - - full_dependency_block narrowRight(bool& save, std::size_t splitDimension, std::size_t radius) const { - full_dependency_block res; - if (Dims - 1 == splitDimension) { - res.dependencies[0] = dependencies[1].narrowLeft(save,splitDimension, radius); - res.dependencies[1] = dependencies[1].narrowRight(save,splitDimension, radius); - res.dependencies[2] = dependencies[2].narrowLeft(save,splitDimension, radius); - } else { - res.dependencies[0] = dependencies[0].narrowRight(save,splitDimension, radius); - res.dependencies[1] = dependencies[1].narrowRight(save,splitDimension, radius); - res.dependencies[2] = dependencies[2].narrowRight(save,splitDimension, radius); - } - return res; - } - }; - - template - struct full_dependency_block { - - using iteration_reference = detail::iteration_reference; - - enum { num_dependencies = 1 }; - - iteration_reference dependency; - - void setCenter(const iteration_reference& ref) { - dependency = ref; - } - - const iteration_reference& getCenter() const { - return dependency; - } - - template - void forEach(const Op& op) const { - op(dependency); - } - - core::impl::reference::dependencies> toCoreDependencies() const { - return core::after(dependency); - } - - template - static auto produceCoreDependencies(const Blocks& ... blocks) { - return core::after(blocks.dependency...); - } - - full_dependency_block narrowLeft(bool& save, std::size_t, std::size_t radius) const { - full_dependency_block res; - res.dependency = dependency.getLeft(); - if (!dependency.getRange().empty() && getMinimumDimensionLength(res.dependency.getRange()) < radius) save = false; - return res; - } - - full_dependency_block narrowRight(bool& save, std::size_t, std::size_t radius) const { - full_dependency_block res; - res.dependency = dependency.getRight(); - if (!dependency.getRange().empty() && getMinimumDimensionLength(res.dependency.getRange()) < radius) save = false; - return res; - } - }; - - } - - template - class full_neighborhood_sync_dependency : public detail::loop_dependency { - - enum { num_dimensions = detail::dimensions::value }; - - using deps_block = detail::full_dependency_block; - - deps_block deps; - - full_neighborhood_sync_dependency(const deps_block& deps) : deps(deps) {} - - public: - - full_neighborhood_sync_dependency(const detail::iteration_reference& loop) : deps() { - deps.setCenter(loop); - } - - const detail::range& getCenterRange() const { - return deps.getCenter().getRange(); - } - - std::vector> getRanges() const { - std::vector> res; - deps.forEach([&](const auto& dep) { - if (!dep.getRange().empty()) res.push_back(dep.getRange()); - }); - return res; - } - - auto toCoreDependencies() const { - return deps.toCoreDependencies(); - } - - detail::SubDependencies> split(const detail::range& left, const detail::range& right) const { - using splitter = detail::range_spliter; - - auto splitDim = splitter::getSplitDimension(deps.getCenter().getDepth()); - - // prepare safety flag - bool save_left = true; - bool save_right = true; - - // compute left and right sub-dependencies - full_neighborhood_sync_dependency res_left(deps.narrowLeft(save_left,splitDim,radius)); - full_neighborhood_sync_dependency res_right(deps.narrowRight(save_right,splitDim,radius)); - - // check coverage and build up result - return { - save_left && res_left.getCenterRange().covers(left) ? res_left : *this, - save_right && res_right.getCenterRange().covers(right) ? res_right : *this - }; - } - - friend std::ostream& operator<< (std::ostream& out, const full_neighborhood_sync_dependency& dep) { - return out << "[" << utils::join(",", dep.getRanges()) << "]"; - } - - }; - - - template - class after_all_sync_dependency : public detail::loop_dependency { - - // the type of iteration dependency - using iteration_reference = detail::iteration_reference; - - iteration_reference dependency; - - public: - - after_all_sync_dependency(const detail::iteration_reference& loop) - : dependency(loop) {} - - const detail::range& getCenterRange() const { - return dependency.getRange(); - } - - std::vector> getRanges() const { - std::vector> res; - res.push_back(dependency.getRange()); - return res; - } - - auto toCoreDependencies() const { - return core::after(dependency); - } - - detail::SubDependencies> split(const detail::range&, const detail::range&) const { - // this dependency never changes - return { *this, *this }; - } - - friend std::ostream& operator<< (std::ostream& out, const after_all_sync_dependency& dep) { - return out << "[" << dep.getCenterRange() << "]"; - } - - }; - - - template - detail::loop_reference pforWithBoundary(const detail::range& r, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const Dependency& dependency) { - - struct RecArgs { - std::size_t depth; - detail::range range; - Dependency dependencies; - }; - - // keep a copy of the full range - auto full = r; - - // trigger parallel processing - return { r, core::prec( - [](const RecArgs& rg) { - // if there is only one element left, we reached the base case - return rg.range.size() <= 1; - }, - [innerBody,boundaryBody,full](const RecArgs& rg) { - // apply the body operation to every element in the remaining range - rg.range.forEachWithBoundary(full,innerBody,boundaryBody); - }, - core::pick( - [](const RecArgs& rg, const auto& nested) { - // in the step case we split the range and process sub-ranges recursively - auto fragments = rg.range.split(rg.depth); - auto& left = fragments.left; - auto& right = fragments.right; - auto dep = rg.dependencies.split(left,right); - return core::parallel( - nested(dep.left.toCoreDependencies(), RecArgs{rg.depth+1,left, dep.left} ), - nested(dep.right.toCoreDependencies(), RecArgs{rg.depth+1,right,dep.right}) - ); - }, - [innerBody,boundaryBody,full](const RecArgs& rg, const auto&) { - // the alternative is processing the step sequentially - rg.range.forEachWithBoundary(full,innerBody,boundaryBody); - } - ) - )(dependency.toCoreDependencies(),RecArgs{0,r,dependency}) }; - } - - template - detail::loop_reference pforWithBoundary(const detail::range& r, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const no_dependencies&) { - - struct RecArgs { - std::size_t depth; - detail::range range; - }; - - // keep a copy of the full range - auto full = r; - - // trigger parallel processing - return { r, core::prec( - [](const RecArgs& r) { - // if there is only one element left, we reached the base case - return r.range.size() <= 1; - }, - [innerBody,boundaryBody,full](const RecArgs& r) { - // apply the body operation to every element in the remaining range - r.range.forEachWithBoundary(full,innerBody,boundaryBody); - }, - core::pick( - [](const RecArgs& r, const auto& nested) { - // in the step case we split the range and process sub-ranges recursively - auto fragments = r.range.split(r.depth); - auto& left = fragments.left; - auto& right = fragments.right; - return core::parallel( - nested(RecArgs{ r.depth+1, left }), - nested(RecArgs{ r.depth+1, right }) - ); - }, - [innerBody,boundaryBody,full](const RecArgs& r, const auto&) { - // the alternative is processing the step sequentially - r.range.forEachWithBoundary(full,innerBody,boundaryBody); - } - ) - )(RecArgs{ 0 , r }) }; - } - - - - - template - detail::loop_reference after(const detail::loop_reference& loop, const Point& point, const Action& action) { - - // get the full range - auto r = loop.getRange(); - - struct RecArgs { - std::size_t depth; - detail::range range; - one_on_one_dependency dependencies; - }; - - // get the initial dependency - auto dependency = one_on_one(loop); - - // trigger parallel processing - return { r, core::prec( - [point](const RecArgs& rg) { - // check whether the point of action is covered by the current range - return !rg.range.covers(point); - }, - [action,point](const RecArgs& rg) { - // trigger the action if the current range covers the point - if (rg.range.covers(point)) action(); - - }, - core::pick( - [](const RecArgs& rg, const auto& nested) { - // in the step case we split the range and process sub-ranges recursively - auto fragments = rg.range.split(rg.depth); - auto& left = fragments.left; - auto& right = fragments.right; - auto dep = rg.dependencies.split(left,right); - return core::parallel( - nested(dep.left.toCoreDependencies(), RecArgs{rg.depth+1, left, dep.left} ), - nested(dep.right.toCoreDependencies(), RecArgs{rg.depth+1, right,dep.right}) - ); - }, - [action,point](const RecArgs& rg, const auto&) { - // trigger the action if the current range covers the point - if (rg.range.covers(point)) action(); - } - ) - )(dependency.toCoreDependencies(),RecArgs{0,r,dependency}) }; - } - -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/algorithm/preduce.h b/vendor/allscale/api/user/algorithm/preduce.h deleted file mode 100644 index a7e8616a5..000000000 --- a/vendor/allscale/api/user/algorithm/preduce.h +++ /dev/null @@ -1,205 +0,0 @@ -#pragma once - -#include - -#include "allscale/api/core/prec.h" - -#include "allscale/api/user/algorithm/pfor.h" - -#include "allscale/utils/assert.h" -#include "allscale/utils/vector.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { - - - // ----- fold / reduce ------ - - /** - * The most generic implementation of the reduction operator. All other - * reductions are reduced to this implementation. - * - * @param a the begin of a range of elements to be reduced - * @param b the end (exclusive) of a range of elements to be reduced - * @param reduce the operation capable of performing a reduction over a subrange - * @param aggregate the operation capable of performing a reduction over a subrange - */ - template< - typename Iter, - typename RangeReductionOp, - typename AggregationOp - > - core::treeture::result_type> - preduce( - const Iter& a, - const Iter& b, - const RangeReductionOp& reduce, - const AggregationOp& aggregate - ) { - - using res_type = typename utils::lambda_traits::result_type; - - // define the argument struct - struct RecArgs { - std::size_t depth; - algorithm::detail::range range; - }; - - return core::prec( - [](const RecArgs& r) { - return r.range.size() <= 1; - }, - [reduce](const RecArgs& r)->res_type { - return reduce(r.range.begin(),r.range.end()); - }, - core::pick( - [aggregate](const RecArgs& r, const auto& nested) { - // here we have the binary splitting - auto fragments = r.range.split(r.depth); - auto left = fragments.left; - auto right = fragments.right; - return core::combine(nested(RecArgs{ r.depth+1, left }),nested(RecArgs{ r.depth+1, right }),aggregate); - }, - [reduce](const RecArgs& r, const auto&)->res_type { - return reduce(r.range.begin(),r.range.end()); - } - ) - )(RecArgs{ 0, algorithm::detail::range(a, b) }); - } - - - - /** - * A variant of the preduce operator where the range based reduction step - * is assembled from a set of utilities to create, update, and reduce a local temporary value. - */ - template< - typename Iter, - typename FoldOp, - typename ReduceOp, - typename InitLocalState, - typename FinishLocalState - > - core::treeture::result_type> - preduce( - const Iter& a, - const Iter& b, - const FoldOp& fold, - const ReduceOp& reduce, - const InitLocalState& init, - const FinishLocalState& finish - ) { - - return preduce( - a, b, [init,fold,finish](const Iter& a, const Iter& b) { - auto res = init(); - algorithm::detail::range(a,b).forEach([&](const auto& cur){ - fold(cur,res); - }); - return finish(res); - }, - reduce - ); - - } - - // ----- reduction ------ - - template - core::treeture::result_type> - preduce(const Iter& a, const Iter& b, const Op& op) { - using res_type = typename utils::lambda_traits::result_type; - - return preduce( - a,b, - [op](const res_type& cur, res_type& res) { - res = op(cur,res); - }, - op, - [](){ return res_type(); }, - [](const res_type& r) { return r; } - ); - - } - - /** - * A parallel reduce implementation over the elements of the given container. - */ - template - core::treeture::result_type> - preduce(Container& c, Op& op) { - return preduce(c.begin(), c.end(), op); - } - - /** - * A parallel reduce implementation over the elements of the given container. - */ - template - core::treeture::result_type> - preduce(const Container& c, const Op& op) { - return preduce(c.begin(), c.end(), op); - } - - - template< - typename Iter, - typename MapOp, - typename ReduceOp, - typename InitLocalState - > - core::treeture::result_type> - preduce( - const Iter& a, - const Iter& b, - const MapOp& map, - const ReduceOp& reduce, - const InitLocalState& init - ) { - - return preduce(a, b, map, reduce, init, ([](typename utils::lambda_traits::result_type r) { return r; } )); - } - - template< - typename Container, - typename MapOp, - typename ReduceOp, - typename InitLocalState, - typename ReduceLocalState - > - core::treeture::result_type> - preduce( - const Container& c, - const MapOp& map, - const ReduceOp& reduce, - const InitLocalState& init, - const ReduceLocalState& exit - ) { - - return preduce(c.begin(), c.end(), map, reduce, init, exit); - - } - - template< - typename Container, - typename MapOp, - typename ReduceOp, - typename InitLocalState - > - core::treeture::result_type> - preduce( - const Container& c, - const MapOp& map, - const ReduceOp& reduce, - const InitLocalState& init - ) { - - return preduce(c.begin(), c.end(), map, reduce, init); - - } - -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/algorithm/stencil.h b/vendor/allscale/api/user/algorithm/stencil.h deleted file mode 100644 index 6677cc99a..000000000 --- a/vendor/allscale/api/user/algorithm/stencil.h +++ /dev/null @@ -1,1474 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "allscale/api/user/data/grid.h" -#include "allscale/api/user/data/static_grid.h" - -#include "allscale/api/user/algorithm/pfor.h" -#include "allscale/api/user/algorithm/async.h" -#include "allscale/api/user/algorithm/internal/operation_reference.h" - -#include "allscale/utils/bitmanipulation.h" -#include "allscale/utils/unused.h" -#include "allscale/utils/vector.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { - - - // --------------------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------------------- - - - template - using Coordinate = utils::Vector; - - template - using Size = Coordinate; - - namespace implementation { - - struct sequential_iterative; - - struct coarse_grained_iterative; - - struct fine_grained_iterative; - - struct sequential_recursive; - - struct parallel_recursive; - - } - - template - class Observer; - - template - struct is_observer; - - template - class stencil_reference; - - template< - typename Impl = implementation::fine_grained_iterative, typename Container, typename InnerUpdate, typename BoundaryUpdate, - typename ... ObserverTimeFilters, typename ... ObserverLocationFilters, typename ... ObserverActions - > - std::enable_if_t::value,stencil_reference> stencil( - Container& res, std::size_t steps, const InnerUpdate& innerUpdate, const BoundaryUpdate& boundaryUpdate, - const Observer& ... observers - ); - - template< - typename Impl = implementation::fine_grained_iterative, typename Container, typename Update, - typename ... ObserverTimeFilters, typename ... ObserverLocationFilters, typename ... ObserverActions - > - stencil_reference stencil( - Container& res, std::size_t steps, const Update& update, - const Observer& ... observers - ); - - template - Observer observer(const TimeStampFilter& timeFilter, const LocationFilter& locationFilter, const Action& action); - - // --------------------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------------------- - - - template - class stencil_reference : public internal::operation_reference { - - public: - - // inherit all constructors - using operation_reference::operation_reference; - - }; - - template< - typename Impl, typename Container, typename InnerUpdate, typename BoundaryUpdate, - typename ... ObserverTimeFilters, typename ... ObserverLocationFilters, typename ... ObserverActions - > - std::enable_if_t::value,stencil_reference> stencil( - Container& a, std::size_t steps, const InnerUpdate& innerUpdate, const BoundaryUpdate& boundaryUpdate, - const Observer& ... observers - ) { - - // forward everything to the implementation - return Impl().process(a,steps,innerUpdate,boundaryUpdate,observers...); - - } - - template< - typename Impl, typename Container, typename Update, - typename ... ObserverTimeFilters, typename ... ObserverLocationFilters, typename ... ObserverActions - > - stencil_reference stencil(Container& a, std::size_t steps, const Update& update,const Observer& ... observers) { - - // use the same update for inner and boundary updates - return stencil(a,steps,update,update,observers...); - - } - - template - class Observer { - public: - TimeStampFilter isInterestedInTime; - LocationFilter isInterestedInLocation; - Action trigger; - - Observer(const TimeStampFilter& timeFilter, const LocationFilter& locationFilter, const Action& action) - : isInterestedInTime(timeFilter), isInterestedInLocation(locationFilter), trigger(action) {} - }; - - template - struct is_observer : public std::false_type {}; - - template - struct is_observer> : public std::true_type {}; - - template - Observer observer(const TimeStampFilter& timeFilter, const LocationFilter& locationFilter, const Action& action) { - return Observer(timeFilter,locationFilter,action); - } - - namespace implementation { - - namespace detail { - - template - void staticForEach(const Op&) { - // nothing to do - } - - template - void staticForEach(const Op& op, const First& first, const Rest& ... rest) { - op(first); - staticForEach(op,rest...); - } - - } - - - // -- Iterative Stencil Implementation --------------------------------------------------------- - - struct sequential_iterative { - - template - stencil_reference process(Container& a, std::size_t steps, const InnerUpdate& innerUpdate, const BoundaryUpdate& boundaryUpdate, const Observers& ... observers) { - - // return handle to asynchronous execution - return async([&a,steps,innerUpdate,boundaryUpdate,observers...]{ - - // iterative implementation - Container b(a.size()); - - Container* x = &a; - Container* y = &b; - - using iter_type = decltype(a.size()); - - for(std::size_t t=0; t - stencil_reference process(Container& a, std::size_t steps, const InnerUpdate& inner, const BoundaryUpdate& boundary, const Observers& ... observers) { - - // return handle to asynchronous execution - return async([&a,steps,inner,boundary,observers...]{ - - // iterative implementation - Container b(a.size()); - - Container* x = &a; - Container* y = &b; - - using iter_type = decltype(a.size()); - - for(std::size_t t=0; t - stencil_reference process(Container& a, std::size_t steps, const InnerUpdate& inner, const BoundaryUpdate& boundary, const Observers& ... observers) { - - // return handle to asynchronous execution - return async([&a,steps,inner,boundary,observers...]{ - - // iterative implementation - Container b(a.size()); - - Container* x = &a; - Container* y = &b; - - using iter_type = decltype(a.size()); - - user::algorithm::detail::loop_reference ref; - - for(std::size_t t=0; t::element_type; - using time_type = std::size_t; - - - template - using Slopes = utils::Vector; - - template - class Base { - public: - - struct range { - index_type begin; - index_type end; - }; - - std::array boundaries; - - static Base zero() { - return full(0); - } - - static Base full(std::size_t size) { - static_assert(dims == 1, "This constructor only supports 1-d bases."); - Base res; - res.boundaries[0] = { 0, (index_type)size }; - return res; - } - - template - static Base full(const utils::Vector& size) { - Base res; - for(std::size_t i=0; i= cur.end) return 0; - res *= (cur.end - cur.begin); - } - return res; - } - - Coordinate extend() const { - Coordinate res; - for(std::size_t i = 0; i& other) const { - Base res; - for(std::size_t i=0; i - struct plain_scanner { - - plain_scanner nested; - - template - void operator()(const Base& base, const InnerBody& inner, const BoundaryBody& boundary, const ObserverBody& observer, Coordinate& pos, std::size_t t, const Coordinate& size) const { - constexpr const auto idx = full_dim - dim - 1; - - // compute boundaries - auto from = base[idx].begin; - auto to = base[idx].end; - auto length = size[idx]; - - // shift range to size window - if (from > length) { - from -= length; - to -= length; - } - - // process range from start to limit - auto limit = std::min(to,length); - processRange(base,inner,boundary,observer,pos,t,size,from,limit); - - // and if necessary the elements beyond, after a wrap-around - if (to <= length) return; - - to -= length; - processRange(base,inner,boundary,observer,pos,t,size,0,to); - } - - template - void processRange(const Base& base, const InnerBody& inner, const BoundaryBody& boundary, const ObserverBody& observer, Coordinate& pos, std::size_t t, const Coordinate& size, std::int64_t from, std::int64_t to) const { - constexpr const auto idx = full_dim - dim - 1; - - // skip an empty range - if (from >= to) return; - - // get inner range - auto innerFrom = from; - auto innerTo = to; - - // check left boundary - if (innerFrom == 0) { - - // process left as a boundary - pos[idx] = 0; - nested(base,boundary,boundary,observer,pos,t,size); - - // skip this one from the inner part - innerFrom++; - } - - // check right boundary - if (innerTo == size[idx]) { - innerTo--; - } - - // process inner part - for(pos[idx]=innerFrom; pos[idx] - struct plain_scanner<0> { - - template - void operator()(const Base& base, const InnerBody& inner, const BoundaryBody& boundary, const ObserverBody& observer, Coordinate& pos, std::size_t t, const Coordinate& size) const { - constexpr const auto idx = full_dim - 1; - - // compute boundaries - auto from = base[idx].begin; - auto to = base[idx].end; - auto length = size[idx]; - - // shift range to size window - if (from > length) { - from -= length; - to -= length; - } - - // process range from start to limit - auto limit = std::min(to,length); - processRange(inner,boundary,observer,pos,t,size,from,limit); - - // and if necessary the elements beyond, after a wrap-around - if (to <= length) return; - - to -= length; - processRange(inner,boundary,observer,pos,t,size,0,to); - } - - template - void processRange(const InnerBody& inner, const BoundaryBody& boundary, const ObserverBody& observer, Coordinate& pos, std::size_t t, const Coordinate& size, std::int64_t from, std::int64_t to) const { - constexpr const auto idx = full_dim - 1; - - // skip an empty range - if (from >= to) return; - - // get inner range - auto innerFrom = from; - auto innerTo = to; - - // check left boundary - if (innerFrom == 0) { - - // process left as a boundary - pos[idx] = 0; - boundary(pos,t); - - // skip this one from the inner part - innerFrom++; - } - - // check right boundary - if (innerTo == size[idx]) { - innerTo--; - } - - // process inner part - for(pos[idx]=innerFrom; pos[idx] - class TaskDependencyList { - - core::task_reference dep; - - TaskDependencyList nested; - - public: - - TaskDependencyList() {} - - template - TaskDependencyList(const core::task_reference& first, const Rest& ... rest) - : dep(first), nested(rest...) {} - - - // support conversion into core dependencies - auto toCoreDependencies() const { - return nested.toCoreDependencies(dep); - } - - template - auto toCoreDependencies(const Deps& ... deps) const { - return nested.toCoreDependencies(dep,deps...); - } - - }; - - template<> - class TaskDependencyList<0> { - - public: - - TaskDependencyList() {} - - // support conversion into core dependencies - auto toCoreDependencies() const { - return core::after(); - } - - template - auto toCoreDependencies(const Deps& ... deps) const { - return core::after(deps...); - } - - }; - - - template - class ZoidDependencies : public TaskDependencyList<3*dims> { - - using super = TaskDependencyList<3*dims>; - - public: - - // TODO: support dependency refinement - - // inherit constructors - using super::super; - - }; - - - template - class Zoid { - - Base base; // the projection of the zoid to the space dimensions - - Slopes slopes; // the direction of the slopes - - time_type t_begin; // the start time - time_type t_end; // the end time - - public: - - Zoid() {} - - Zoid(const Base& base, const Slopes& slopes, std::size_t t_begin, std::size_t t_end) - : base(base), slopes(slopes), t_begin(t_begin), t_end(t_end) {} - - - template - void forEach(const EvenOp& even, const OddOp& odd, const EvenBoundaryOp& evenBoundary, const OddBoundaryOp& oddBoundary, const EvenObserverOp& evenObserver, const OddObserverOp& oddObserver, const Size& limits) const { - - // TODO: make this one cache oblivious - - // create the plain scanner - plain_scanner scanner; - - Coordinate x; - auto plainBase = base; - - // over the time - for(std::size_t t = t_begin; t < t_end; ++t) { - - // process this plain - if ( t & 0x1 ) { - scanner(plainBase, odd, oddBoundary, oddObserver, x, t, limits); - } else { - scanner(plainBase, even, evenBoundary, evenObserver, x, t, limits); - } - - // update the plain for the next level - for(std::size_t i=0; i - core::treeture pforEach(const ZoidDependencies& deps, const EvenOd& even, const OddOp& odd, const EvenBoundaryOp& evenBoundary, const OddBoundaryOp& oddBoundary, const EvenObserverOp& evenObserver, const OddObserverOp& oddObserver, const Size& limits) const { - - struct Params { - Zoid zoid; - ZoidDependencies deps; - }; - - // recursively decompose the covered space-time volume - return core::prec( - [](const Params& params) { - // check whether this zoid can no longer be divided - return params.zoid.isTerminal(); - }, - [&](const Params& params) { - // process final steps sequentially - params.zoid.forEach(even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits); - }, - core::pick( - [](const Params& params, const auto& rec) { - // unpack parameters - const auto& zoid = params.zoid; - const auto& deps = params.deps; - - // make sure the zoid is not terminal - assert_false(zoid.isTerminal()); - - // check whether it can be split in space - if (!zoid.isSpaceSplitable()) { - // we need a time split - auto parts = zoid.splitTime(); - return core::sequential( - rec(deps.toCoreDependencies(),Params{parts.bottom,deps}), - rec(deps.toCoreDependencies(),Params{parts.top,deps}) - ); - } - - // let's do a space split - auto parts = zoid.splitSpace(); - - // schedule depending on the orientation - return (parts.opening) - ? core::sequential( - rec(deps.toCoreDependencies(),Params{parts.c,deps}), - core::parallel( - rec(deps.toCoreDependencies(),Params{parts.l,deps}), - rec(deps.toCoreDependencies(),Params{parts.r,deps}) - ) - ) - : core::sequential( - core::parallel( - rec(deps.toCoreDependencies(),Params{parts.l,deps}), - rec(deps.toCoreDependencies(),Params{parts.r,deps}) - ), - rec(deps.toCoreDependencies(),Params{parts.c,deps}) - ); - - - }, - [&](const Params& params, const auto&) { - // provide sequential alternative - params.zoid.forEach(even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits); - } - ) - )(deps.toCoreDependencies(),Params{*this,deps}); - - } - - template - core::treeture pforEach(const EvenOd& even, const OddOp& odd, const EvenBoundaryOp& evenBoundary, const OddBoundaryOp& oddBoundary, const EvenObserverOp& evenObserver, const OddObserverOp& oddObserver, const Size& limits) const { - // run the pforEach with no initial dependencies - return pforEach(ZoidDependencies(),even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits); - } - - - /** - * The height of this zoid in temporal direction. - */ - std::size_t getHeight() const { - return std::size_t(t_end-t_begin); - } - - /** - * Compute the number of elements this volume is covering - * when being projected to the space domain. - */ - int getFootprint() const { - int size = 1; - int dt = getHeight(); - for(std::size_t i=0; i 4*getHeight(); - } - - // the result of a time split - struct TimeDecomposition { - Zoid top; - Zoid bottom; - }; - - /** - * Splits this zoid in two sub-zoids along the time dimension. The - * First component will be the bottom, the second the top. - */ - TimeDecomposition splitTime() const { - auto split = getHeight() / 2; - - Base mid = base; - - for(std::size_t i=0; imax_width) { - max_width = width; - max_dim = i; - } - } - - // the max dimension is the split dimensin - auto split_dim = max_dim; - - // check whether longest dimension can be split - assert(isSplitable(split_dim)); - - // create 3 fragments - SpaceDecomposition res { - *this, *this, *this, (slopes[split_dim] < 0) - }; - - // get the split point - auto center = (base.boundaries[split_dim].begin + base.boundaries[split_dim].end) / 2; - auto left = center; - auto right = center; - - if (slopes[split_dim] < 0) { - auto hight = getHeight(); - left -= hight; - right += hight; - } - - res.l.base.boundaries[split_dim].end = left; - res.c.base.boundaries[split_dim] = { left, right }; - res.r.base.boundaries[split_dim].begin = right; - - // invert direction of center piece - res.c.slopes[split_dim] *= -1; - - // return decomposition - return res; - } - - }; - - /** - * A utility class for enumerating the dependencies of a task in a - * n-dimensional top-level task graph. - */ - template - struct task_dependency_extractor { - - template - void operator()(const Body& body, const Args& ... args) { - task_dependency_extractor nested; - if (taskIdx & (1< - struct task_dependency_extractor { - - template - void operator()(const Body& body, const Args& ... args) { - if (taskIdx & 0x1) { - body(args...,taskIdx & ~0x1); - } else { - body(args...); - } - } - - }; - - - /** - * A utility class for enumerating the dependencies of a task in a - * n-dimensional top-level task graph. - */ - template - struct task_dependency_enumerator { - - template - void operator()(const Body& body) { - for(std::size_t i=0;i<=Dims;i++) { - visit(body,i); - } - } - - template - void visit(const Body& body,std::size_t numBits) { - task_dependency_enumerator().visit(body,numBits); - if ((std::size_t)(utils::countOnes(taskIdx))==numBits) { - task_dependency_extractor()(body,taskIdx); - } - } - - }; - - template - struct task_dependency_enumerator { - - template - void visit(const Body& body,std::size_t numBits) { - if (numBits == 0) { - task_dependency_extractor<0,Dims-1>()(body,0); - } - } - - }; - - /** - * A utility to statically enumerate the tasks and dependencies for - * the top-level zoid task decomposition scheme. On the top level, - * the set of tasks and its dependencies are isomorph to the vertices - * and edges in a n-dimensional hyper cube. This utility is enumerating - * those edges, as well as listing its predecessors according to the - * sub-set relation. - */ - template - struct task_graph_enumerator { - - template - void operator()(const Body& body) { - task_dependency_enumerator enumerator; - enumerator(body); - } - - }; - - - template - class ExecutionPlan { - - using zoid_type = Zoid; - - // the execution plan of one layer -- represented as an embedded hyper-cube - using layer_plan = std::array; - - // the list of execution plans of all layers - std::vector layers; - - public: - - template - void runSequential(const EvenOp& even, const OddOp& odd, const EvenBoundaryOp& evenBoundary, const OddBoundaryOp& oddBoundary, const EvenObserver& evenObserver, const OddObserver& oddObserver, const Size& limits) const { - const std::size_t num_tasks = 1 << Dims; - - // fill a vector with the indices of the tasks - std::array order; - for(std::size_t i = 0; i - core::treeture runParallel(const EvenOp& even, const OddOp& odd, const EvenBoundaryOp& evenBoundary, const OddBoundaryOp& oddBoundary, const EvenObserver& evenObserver, const OddObserver& oddObserver, const Size& limits) const { - - const std::size_t num_tasks = 1 << Dims; - - // start tasks with mutual dependencies - core::treeture last = core::done(); - for(const auto& cur : layers) { - - std::array,num_tasks> jobs; - - // walk through graph dependency graph - enumTaskGraph([&](std::size_t idx, const auto& ... deps){ - - // special case handling for first task (has to depend on previous task) - if (idx == 0) { - // create first task - jobs[idx] = (last.isDone()) - ? cur[idx].pforEach(even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits) - : cur[idx].pforEach(ZoidDependencies(last),even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits); - return; - } - - // create this task with corresponding dependencies - jobs[idx] = cur[idx].pforEach(ZoidDependencies(jobs[deps]...),even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits); - - }); - - // update last - last = std::move(jobs.back()); - } - - // return handle to last task - return last; - - } - - static ExecutionPlan create(const Base& base, std::size_t steps) { - - // get size of structure - auto size = base.extend(); - - // the the smallest width (this is the limiting factor for the height) - auto width = base.getMinimumWidth(); - - // get the height of the largest zoids, thus the height of each layer - auto height = width/2; - - // compute base area partitioning - struct split { - typename Base::range left; - typename Base::range right; - }; - std::array splits; - for(std::size_t i = 0; i(t0+height,steps); - - // create the list of zoids in this step - plan.layers.emplace_back(); - layer_plan& zoids = plan.layers.back(); - - // generate binary patterns from 0 to 2^dims - 1 - for(size_t i=0; i < (1< curBase = base; - Slopes slopes; - - // move base to center on field, edge, or corner - for(size_t j=0; j(curBase, slopes, t0, t1); - } - - } - - // build the final result - return plan; - } - - template - static void enumTaskGraph(const Body& body) { - task_graph_enumerator()(body); - } - - private: - - static std::size_t getNumBitsSet(std::size_t mask) { - return utils::countOnes((unsigned)mask); - } - - }; - - - template - struct container_info_base { - constexpr static const unsigned dimensions = Dims; - using base_type = Base; - }; - - - template - struct container_info : public container_info_base<1> { - using index_type = detail::index_type; - }; - - template - struct container_info> : public container_info_base { - using index_type = utils::Vector; - }; - - template - struct container_info> : public container_info_base { - using index_type = utils::Vector; - }; - - template - struct coordinate_converter { - auto& operator()(const Coordinate<1>& pos) { - return pos[0]; - } - }; - - template - struct coordinate_converter> { - auto& operator()(const Coordinate& pos) { - return pos; - } - }; - - template - struct coordinate_converter> { - auto& operator()(const Coordinate& pos) { - return pos; - } - }; - - } - - struct sequential_recursive { - - template - stencil_reference process(Container& a, std::size_t steps, const InnerUpdate& inner, const BoundaryUpdate& boundary, const Observers& ... observers) { - - using namespace detail; - - const unsigned dims = container_info::dimensions; - using base_t = typename container_info::base_type; - - // iterative implementation - Container b(a.size()); - - // TODO: - // - switch internally to cache-oblivious access pattern (optional) - - // get size of structure - base_t base = base_t::full(a.size()); - auto size = base.extend(); - - // wrap update function into zoid-interface adapter - auto even = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - b[p] = inner(t,p,a); - }; - - auto odd = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - a[p] = inner(t,p,b); - }; - - auto evenBoundary = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - b[p] = boundary(t,p,a); - }; - - auto oddBoundary = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - a[p] = boundary(t,p,b); - }; - - auto evenObserver = [&](const Coordinate& from, const Coordinate& to, time_t t){ - - // create a operation handling one observer - __allscale_unused auto handler = [&](const auto& observer){ - // check whether this time step is of interest - if(!observer.isInterestedInTime(t)) return; - // walk through space - pfor(from,to, - [&](const Coordinate::dimensions>& i) { - coordinate_converter conv; - if (observer.isInterestedInLocation(i)) { - observer.trigger(t,i,b[conv(i)]); - } - } - ); - }; - - // process all observers - __allscale_unused auto l = { 0,(handler(observers),0)... }; - }; - - auto oddObserver = [&](const Coordinate& from, const Coordinate& to, time_t t){ - - // create a operation handling one observer - __allscale_unused auto handler = [&](const auto& observer){ - // check whether this time step is of interest - if(!observer.isInterestedInTime(t)) return; - // walk through space - pfor(from,to, - [&](const Coordinate::dimensions>& i) { - coordinate_converter conv; - if (observer.isInterestedInLocation(i)) { - observer.trigger(t,i,a[conv(i)]); - } - } - ); - }; - - // process all observers - __allscale_unused auto l = { 0,(handler(observers),0)... }; - }; - - // get the execution plan - auto exec_plan = ExecutionPlan::create(base,steps); - - // process the execution plan - exec_plan.runSequential(even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,size); - - - // make sure the result is in the a copy - if (steps % 2) { - std::swap(a,b); - } - - // done - return {}; - } - }; - - - struct parallel_recursive { - - template - stencil_reference process(Container& a, std::size_t steps, const InnerUpdate& inner, const BoundaryUpdate& boundary, const Observers& ... observers) { - - using namespace detail; - - const unsigned dims = container_info::dimensions; - using base_t = typename container_info::base_type; - - // iterative implementation - Container b(a.size()); - - // TODO: - // - switch internally to cache-oblivious access pattern (optional) - // - make parallel with fine-grained dependencies - - // get size of structure - base_t base = base_t::full(a.size()); - auto size = base.extend(); - - // wrap update function into zoid-interface adapter - auto even = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - b[p] = inner(t,p,a); - }; - - auto odd = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - a[p] = inner(t,p,b); - }; - - auto evenBoundary = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - b[p] = boundary(t,p,a); - }; - - auto oddBoundary = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - a[p] = boundary(t,p,b); - }; - - auto evenObserver = [&](const Coordinate& from, const Coordinate& to, time_t t){ - - // create a operation handling one observer - __allscale_unused auto handler = [&](const auto& observer){ - // check whether this time step is of interest - if(!observer.isInterestedInTime(t)) return; - // walk through space - pfor(from,to, - [&](const Coordinate::dimensions>& i) { - coordinate_converter conv; - if (observer.isInterestedInLocation(i)) { - observer.trigger(t,i,b[conv(i)]); - } - } - ); - }; - - // process all observers - __allscale_unused auto l = { 0,(handler(observers),0)... }; - }; - - auto oddObserver = [&](const Coordinate& from, const Coordinate& to, time_t t){ - - // create a operation handling one observer - __allscale_unused auto handler = [&](const auto& observer){ - // check whether this time step is of interest - if(!observer.isInterestedInTime(t)) return; - // walk through space - pfor(from,to, - [&](const Coordinate::dimensions>& i) { - coordinate_converter conv; - if (observer.isInterestedInLocation(i)) { - observer.trigger(t,i,a[conv(i)]); - } - } - ); - }; - - // process all observers - __allscale_unused auto l = { 0,(handler(observers),0)... }; - }; - - // get the execution plan - auto exec_plan = ExecutionPlan::create(base,steps); - - // process the execution plan - exec_plan.runParallel(even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,size).wait(); - - // make sure the result is in the a copy - if (steps % 2) { - std::swap(a,b); - } - - // done - return {}; - } - }; - - } // end namespace implementation - - -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/algorithm/vcycle.h b/vendor/allscale/api/user/algorithm/vcycle.h deleted file mode 100644 index 95af0c925..000000000 --- a/vendor/allscale/api/user/algorithm/vcycle.h +++ /dev/null @@ -1,272 +0,0 @@ -#pragma once - -#include "allscale/api/user/algorithm/async.h" -#include "allscale/api/core/treeture.h" -#include "allscale/api/user/algorithm/internal/operation_reference.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { - - - // --------------------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------------------- - - - /** - * The VCycle utility enalbes the generic description of a arbitrarily deep V-cycle computation. - * Each stage is realized by a different specialization of the VCycleStage class, conducting the - * necessary reduction, computation, and prolongation steps to assemble the full VCycle. - */ - - class vcycle_reference; - - /** - * A generic v-cycle implementation enabling the creation of a vcycle solver by providing - * an implementation of a v-cycle stage body. - */ - template class StageBody, typename Mesh> - class VCycle; - - - // --------------------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------------------- - - - /** - * An entity to reference the full range of a scan. This token - * can not be copied and will wait for the completion of the scan upon destruction. - */ - class vcycle_reference : public internal::operation_reference { - - public: - - // inherit all constructors - using operation_reference::operation_reference; - - }; - - - namespace detail { - - - template< - typename Mesh, - template class StageBody, - unsigned Level, // the level covered by this instance - unsigned NumLevels // total number of levels - > - class VCycleStage { - - using stage_body = StageBody; - - using nested_stage_type = VCycleStage; - - using stage_body_type = StageBody; - - stage_body_type body; - - nested_stage_type nested; - - public: - - VCycleStage(const Mesh& mesh) - : body(mesh), nested(mesh) {} - - /** - * A function processing a single V-cycle starting at the current level. - */ - void run() { - // one iteration of the V cycle (actually very simple) - up(); // going up (fine to coarse) - down(); // going down (coarse to fine) - } - - void up() { - // forward call to nested - nested.up(); - body.restrictFrom(nested.getBody()); - body.computeFineToCoarse(); - } - - void down() { - body.prolongateTo(nested.getBody()); - nested.getBody().computeCoarseToFine(); - nested.down(); - } - - stage_body_type& getBody() { - return body; - } - - void prolongateFrom(const StageBody& parentBody) { - body.prolongateFrom(parentBody); - } - - template - typename std::enable_if&>::type - getStageBody() const { - return body; - } - - template - typename std::enable_if&>::type - getStageBody() const { - return nested.template getStageBody(); - } - - template - typename std::enable_if&>::type - getStageBody() { - return body; - } - - template - typename std::enable_if&>::type - getStageBody() { - return nested.template getStageBody(); - } - - template - void forEachStage(const Op& op) { - op(Level, this->body); - nested.forEachStage(op); - } - - template - void forEachStage(const Op& op) const { - op(Level, this->body); - nested.forEachStage(op); - } - - }; - - - template< - typename Mesh, - template class StageBody, - unsigned NumLevels // total number of levels - > - class VCycleStage { - - using stage_body_type = StageBody; - - stage_body_type body; - - public: - - VCycleStage(const Mesh& mesh) - : body(mesh) {} - - /** - * A function processing a single V-cycle starting at the current level. - */ - void run() { - // one iteration of the V cycle (actually very simple) - up(); // going up (fine to coarse) - down(); // going down (coarse to fine) - } - - void up() { - // just compute on this level - body.computeFineToCoarse(); - } - - void down() { - // nothing to do - } - - stage_body_type& getBody() { - return body; - } - - void prolongateTo(const StageBody& parentBody) { - body.prolongateTo(parentBody); - } - - template - typename std::enable_if<0==Lvl,const StageBody&>::type - getStageBody() const { - return body; - } - - template - typename std::enable_if<0==Lvl,StageBody&>::type - getStageBody() { - return body; - } - - template - void forEachStage(const Op& op) { - op(0, this->body); - } - - template - void forEachStage(const Op& op) const { - op(0, this->body); - } - - }; - - - } - - - - template< - template class StageBody, - typename Mesh - > - class VCycle { - - using top_stage_type = detail::VCycleStage; - - top_stage_type topStage; - - public: - - using mesh_type = Mesh; - - const mesh_type& mesh; - - VCycle(const mesh_type& mesh) : topStage(mesh), mesh(mesh) {} - - vcycle_reference run(std::size_t numCycles = 1) { - return async([&, numCycles]() { - // run the given number of cycles - for(std::size_t i = 0; i - const StageBody& getStageBody() const { - return topStage.template getStageBody(); - } - - template - StageBody& getStageBody() { - return topStage.template getStageBody(); - } - - template - void forEachStage(const Op& op) { - topStage.forEachStage(op); - } - - template - void forEachStage(const Op& op) const { - topStage.forEachStage(op); - } - - }; - - -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/arithmetic.h b/vendor/allscale/api/user/arithmetic.h deleted file mode 100644 index 4f812586e..000000000 --- a/vendor/allscale/api/user/arithmetic.h +++ /dev/null @@ -1,45 +0,0 @@ - -#pragma once - -#include -#include - -#include "allscale/api/core/treeture.h" - - -namespace allscale { -namespace api { -namespace user { - - // --- specific aggregators --- - - - template() + std::declval())> - auto add(A&& a, B&& b) { - return core::combine(std::move(a),std::move(b),[](const R& a, const R& b) { return a + b; }); - } - - template() - std::declval())> - auto sub(A&& a, B&& b) { - return core::combine(std::move(a),std::move(b),[](const R& a, const R& b) { return a - b; }); - } - - template() * std::declval())> - auto mul(A&& a, B&& b) { - return core::combine(std::move(a),std::move(b),[](const R& a, const R& b) { return a * b; }); - } - - - template(),std::declval()))> - auto min(A&& a, B&& b) { - return core::combine(std::move(a),std::move(b),[](const R& a, const R& b) { return std::min(a,b); }); - } - - template(),std::declval()))> - auto max(A&& a, B&& b) { - return core::combine(std::move(a),std::move(b),[](const R& a, const R& b) { return std::max(a,b); }); - } - -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/data/adaptive_grid.h b/vendor/allscale/api/user/data/adaptive_grid.h deleted file mode 100644 index 802aabe35..000000000 --- a/vendor/allscale/api/user/data/adaptive_grid.h +++ /dev/null @@ -1,577 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/api/core/data.h" - -#include "allscale/api/user/data/grid.h" -#include "allscale/api/user/algorithm/pfor.h" - -#include "allscale/utils/assert.h" -#include "allscale/utils/serializer.h" -#include "allscale/utils/static_grid.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - - // --------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------- - - template - using AdaptiveGridSharedData = GridSharedData; - - template - using AdaptiveGridPoint = GridPoint; - - template - using AdaptiveGridBox = GridBox; - - template - using AdaptiveGridRegion = GridRegion; - - - // --------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------- - - template - struct layer; - - template - struct layers { - enum { - num_layers = sizeof...(Layers) - }; - }; - - // structures for each Cell configuration and number of layers for nesting - template - struct CellConfig { - - enum { - dims = Dims, - num_layers = Layers::num_layers - }; - }; - - enum Direction { - Up, Down, Left, Right - }; - - namespace detail { - - template - struct size { - typedef size next; - }; - - template - struct make_size { - typedef typename make_size::type::next type; - }; - - template<> - struct make_size<0> { - typedef size<> type; - }; - - template - std::vector getBoundary(const Direction& dir, const utils::StaticGrid& data) { // returns vector of boundary data in each direction - int size[] = { Sizes... }; - int xSize = size[0]; - int ySize = size[1]; - switch(dir) { - case Up: { // returns data from top strip of domain to neighbor - std::vector res(xSize); - for(int i = 0; i < xSize; i++) - res[i] = data[{ i, ySize - 1}]; - return res; - } - case Down: { // returns data from bottom strip of domain to neighbor - std::vector res(xSize); - for(int i = 0; i < xSize; i++) - res[i] = data[{ i, 0 }]; - return res; - } - case Left: { - std::vector res(ySize); - for(int i = 0; i < ySize; i++) { - res[i] = data[{ 0, i }]; - } - return res; - } - case Right: { - std::vector res(ySize); - for(int i = 0; i < ySize; i++) - res[i] = data[{ xSize - 1, i }]; - return res; - } - } - return std::vector(); - } - - template - void setBoundary(const Direction& dir, utils::StaticGrid& data, const std::vector& boundary) { - int size[] = { Sizes... }; - int xSize = size[0]; - int ySize = size[1]; - - switch(dir) { - case Up: { - assert_eq(boundary.size(), (size_t)xSize); - for(int i = 0; i < xSize; i++) - data[{ i, ySize - 1 }] = boundary[i]; - return; - } - case Down: { - assert_eq(boundary.size(), (size_t)xSize); - for(int i = 0; i < xSize; i++) - data[{ i, 0 }] = boundary[i]; - return; - } - case Left: { - assert_eq(boundary.size(), (size_t)ySize); - for(int i = 0; i < ySize; i++) - data[{ 0, i }] = boundary[i]; - return; - } - case Right: { - assert_eq(boundary.size(), (size_t)ySize); - for(int i = 0; i < ySize; i++) - data[{ xSize - 1, i }] = boundary[i]; - return; - } - } - } - - - } // end namespace detail - - - template - struct GridLayerData; - - template - struct GridLayerData, layers, Rest...>> { - static_assert(sizeof...(Sizes) == sizeof...(Dims), "layer dimension has to be equal to the grid dimension."); - - using data_type = utils::StaticGrid; - using nested_type = GridLayerData, layers>; - using addr_type = allscale::utils::Vector; - - enum { layer_number = sizeof...(Rest) + 1 }; - - // the values to be stored on this layer - data_type data; - - // the nested layers - nested_type nested; - - unsigned getLayerNumber() const { return layer_number; } - - template - typename std::enable_if::type getLayer() { - return data; - } - - template - typename std::enable_if::type getLayer() const { - return data; - } - - template - typename std::enable_if < Layer())>::type getLayer() { - return nested.template getLayer(); - } - - template - typename std::enable_if < Layer(nested).template getLayer())>::type getLayer() const { - return nested.template getLayer(); - } - - T& getData(unsigned layer, const addr_type& addr) { - if(layer == getLayerNumber()) { - return data[addr]; - } - return nested.getData(layer, addr); - } - - const T& getData(unsigned layer, const addr_type& addr) const { - if(layer == getLayerNumber()) { - return data[addr]; - } - return nested.getData(layer, addr); - } - - allscale::utils::Vector getLayerSize(unsigned layer) const { - if(layer == getLayerNumber()) { - return data.size(); - } - return nested.getLayerSize(layer); - } - - template - void forAllOnLayer(unsigned layer, const Op& op) const { - if(layer == getLayerNumber()) { - // apply it to this value - data.forEach(op); - } else { - nested.forAllOnLayer(layer, op); - } - } - - template - void forAllOnLayer(unsigned layer, const Op& op) { - if(layer == getLayerNumber()) { - // apply it to this value - data.forEach(op); - } else { - nested.forAllOnLayer(layer, op); - } - } - - template - void refineFromLayer(unsigned layer, const Refiner& refiner) { - if(layer == getLayerNumber()) { - // iterate over cells on nested layer - api::user::algorithm::detail::forEach({0}, nested.data.size(), [&](const auto& index) -> void { - // using the index of a cell on nested layer, computes index covering cell on this layer - auto newIndex = utils::elementwiseDivision(index, utils::elementwiseDivision(nested.data.size(), data.size())); - // simply replicate data to cell on nested layer - nested.data[index] = refiner(data[newIndex]); - }); - } else { - nested.refineFromLayer(layer, refiner); - } - } - - template - void refineFromLayerGrid(unsigned layer, const Refiner& refiner) { - if(layer == getLayerNumber()) { - // using the index of a cell on this layer, computes index of first covered cell on nested layer - auto indexer = [&](const auto& index) { return utils::elementwiseProduct(index, utils::elementwiseDivision(nested.data.size(), data.size())); }; - - // iterate over cells on this layer - api::user::algorithm::detail::forEach({ 0 }, data.size(), [&](const auto& index) -> void { - const auto& res = refiner(data[index]); - auto begin = indexer(index); - auto end = indexer(index + decltype(index){1}); - api::user::algorithm::detail::forEach(begin, end, [&](const auto& i) { - nested.data[i] = res[i-indexer(index)]; - }); - }); - - } else { - nested.refineFromLayerGrid(layer, refiner); - } - } - - template - void coarsenToLayer(unsigned layer, const Coarsener& coarsener) { - if(layer == getLayerNumber()) { - // using the index of a cell on this layer, computes index of first covered cell on nested layer - auto indexer = [&](const auto& index) { return utils::elementwiseProduct(index, utils::elementwiseDivision(nested.data.size(), data.size())); }; - - // iterate over cells on this layer - api::user::algorithm::detail::forEach({ 0 }, data.size(), [&](const auto& index) -> void { - T sum = T(); - // iterate over subset of cells on nested layer, to be projected to the current cell pointed to by index - auto begin = indexer(index); - auto end = indexer(index + decltype(index){1}); - api::user::algorithm::detail::forEach(begin, end, [&](const auto& i) -> void { - sum += coarsener(nested.data[i]); - }); - // compute divisor for average - unsigned result = 1; - (void)std::initializer_list{ (result *= Dims, 0u)... }; - data[index] = sum / result; - }); - - } else { - nested.coarsenToLayer(layer, coarsener); - } - } - - template - void coarsenToLayerGrid(unsigned layer, const Coarsener& coarsener) { - if(layer == getLayerNumber()) { - // using the index of a cell on this layer, computes index of first covered cell on nested layer - auto indexer = [&](const auto& index) { return utils::elementwiseProduct(index, utils::elementwiseDivision(nested.data.size(), data.size())); }; - - // iterate over cells on this layer - utils::StaticGrid param; - api::user::algorithm::detail::forEach({ 0 }, data.size(), [&](const auto& index) -> void { - // iterate over subset of cells on nested layer, to be projected to the current cell pointed to by index - auto begin = indexer(index); - auto end = indexer(index + decltype(index){1}); - api::user::algorithm::detail::forEach(begin, end, [&](const decltype(index)& i) -> void { - param[i - indexer(index)] = nested.data[i]; - }); - data[index] = coarsener(param); - }); - - } else { - nested.coarsenToLayerGrid(layer, coarsener); - } - } - - std::vector getBoundary(unsigned layer, Direction dir) const { // returns vector of boundary data in each direction - if(layer == getLayerNumber()) { - return detail::getBoundary(dir, data); - } - return nested.getBoundary(layer, dir); - } - - void setBoundary(unsigned layer, Direction dir, const std::vector& boundary) { - if(layer == getLayerNumber()) { - detail::setBoundary(dir, data, boundary); - } else { - nested.setBoundary(layer, dir, boundary); - } - } - - void store(utils::ArchiveWriter& writer) const { - writer.write(data); - writer.write(nested); - } - - static GridLayerData load(utils::ArchiveReader& reader) { - auto data = std::move(reader.read()); - auto nested = std::move(reader.read()); - return { data, nested }; - } - - }; - - template - struct GridLayerData, layers<>> { - - using data_type = utils::StaticGrid; - using addr_type = allscale::utils::Vector; - - // the values to be stored on this last layer - data_type data; - - unsigned getLayerNumber() const { return 0; } - - template - typename std::enable_if::type getLayer() { - return data; - } - - template - typename std::enable_if::type getLayer() const { - return data; - } - - T& getData(unsigned layer, const addr_type& addr) { - assert_eq(layer, 0) << "Error: trying to access layer " << layer << " --no such layer!"; - return data[addr]; - } - - const T& getData(unsigned layer, const addr_type& addr) const { - assert_eq(layer, 0) << "Error: trying to access layer " << layer << " --no such layer!"; - return data[addr]; - } - - allscale::utils::Vector getLayerSize(unsigned layer) const { - assert_eq(layer, 0) << "Error: trying to access layer " << layer << " --no such layer!"; - return data.size(); - } - - template - void forAllOnLayer(unsigned layer, const Op& op) const { - assert_eq(layer, 0) << "Error: trying to access layer " << layer << " --no such layer!"; - data.forEach(op); - } - - template - void forAllOnLayer(unsigned layer, const Op& op) { - assert_eq(layer, 0) << "Error: trying to access layer " << layer << " --no such layer!"; - data.forEach(op); - } - - template - void refineFromLayer(unsigned layer, const Refiner&) { - assert_fail() << "Error: trying to access layer " << layer << " --no such layer!"; - } - - template - void refineFromLayerGrid(unsigned layer, const Refiner&) { - assert_fail() << "Error: trying to access layer " << layer << " --no such layer!"; - } - - template - void coarsenToLayer(unsigned layer, const Coarsener&) { - assert_fail() << "Error: trying to access layer " << layer << " --no such layer!"; - } - - template - void coarsenToLayerGrid(unsigned layer, const Coarsener&) { - assert_fail() << "Error: trying to access layer " << layer << " --no such layer!"; - } - - std::vector getBoundary(__allscale_unused unsigned layer, Direction dir) const { - assert_eq(0, layer) << "No such layer"; - return detail::getBoundary(dir, data); - } - - void setBoundary(__allscale_unused unsigned layer, Direction dir, const std::vector& boundary) { - assert_eq(0, layer) << "No such layer"; - detail::setBoundary(dir, data, boundary); - } - - void store(utils::ArchiveWriter& writer) const { - writer.write(data); - } - - static GridLayerData, layers<>> load(utils::ArchiveReader& reader) { - GridLayerData, layers<>> grid; - grid.data = std::move(reader.read()); - - return grid; - } - - }; - - - template - struct AdaptiveGridCell; - - - template - struct AdaptiveGridCell> { - using element_type = T; - using unit_size = typename detail::make_size::type; - using addr_type = allscale::utils::Vector; - - AdaptiveGridCell() = default; - AdaptiveGridCell(const AdaptiveGridCell& other) = delete; - AdaptiveGridCell(AdaptiveGridCell&& other) = default; - - // determines the active layer of this grid cell - unsigned active_layer = 0; - - // the data stored in - GridLayerData data; - - AdaptiveGridCell& operator=(const AdaptiveGridCell& other) { - if(this == &other) return *this; - active_layer = other.active_layer; - data = other.data; - return *this; - } - - AdaptiveGridCell& operator=(const T& value) { - // update all active cells - data.forAllOnLayer(active_layer,[&](T& cur){ - cur = value; - }); - return *this; - } - - T& operator[](const addr_type& addr) { - return data.getData(active_layer, addr); - } - - const T& operator[](const addr_type& addr) const { - return data.getData(active_layer, addr); - } - - allscale::utils::Vector getActiveLayerSize() const { - return data.getLayerSize(active_layer); - } - - void setActiveLayer(unsigned level) { - active_layer = level; - } - - unsigned getActiveLayer() const { - return active_layer; - } - - template - auto getLayer() -> decltype(data.template getLayer())& { - return data.template getLayer(); - } - - template - auto getLayer() const -> const decltype(data.template getLayer())& { - return data.template getLayer(); - } - - template - void forAllActiveNodes(const Op& op) const { - data.forAllOnLayer(active_layer, op); - } - - template - void forAllActiveNodes(const Op& op) { - data.forAllOnLayer(active_layer, op); - } - - template - void refine(const Refiner& refiner) { - assert_gt(active_layer, 0) << "Cannot refine any further"; - data.refineFromLayer(active_layer, refiner); - active_layer--; - } - - template - void refineGrid(const Refiner& refiner) { - assert_gt(active_layer, 0) << "Cannot refine any further"; - data.refineFromLayerGrid(active_layer, refiner); - active_layer--; - } - - template - void coarsen(const Coarsener& coarsener) { - assert_gt(Layers::num_layers, active_layer) << "Cannot coarsen any further"; - active_layer++; - data.coarsenToLayer(active_layer, coarsener); - } - - template - void coarsenGrid(const Coarsener& coarsener) { - assert_gt(Layers::num_layers, active_layer) << "Cannot coarsen any further"; - active_layer++; - data.coarsenToLayerGrid(active_layer, coarsener); - } - - std::vector getBoundary(Direction dir) const { - return data.getBoundary(active_layer, dir); - } - - void setBoundary(Direction dir, const std::vector& boundary) { - data.setBoundary(active_layer, dir, boundary); - } - - void store(utils::ArchiveWriter& writer) const { - writer.write(active_layer); - writer.write(data); - } - - static AdaptiveGridCell load(utils::ArchiveReader& reader) { - AdaptiveGridCell cell; - cell.active_layer = std::move(reader.read()); - cell.data = reader.read>(); - return cell; - } - - }; - - template - using AdaptiveGridFragment = GridFragment, CellConfig::dims>; - - template - using AdaptiveGrid = Grid, CellConfig::dims>; - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/data/grid.h b/vendor/allscale/api/user/data/grid.h deleted file mode 100644 index 06c052bc7..000000000 --- a/vendor/allscale/api/user/data/grid.h +++ /dev/null @@ -1,623 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/assert.h" -#include "allscale/utils/printer/join.h" -#include "allscale/utils/vector.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - - // --------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------- - - - using coordinate_type = std::int64_t; - - template - using GridPoint = utils::Vector; - - template - class GridBox; - - template - class GridRegion; - - template - class GridFragment; - - template - class Grid; - - - - - // --------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------- - - namespace detail { - - template - struct difference_computer { - - template - void collectDifferences(const GridBox& a, const GridBox& b, GridBox& cur, std::vector>& res) { - std::size_t i = I-1; - - // if b is within a - if (a.min[i] <= b.min[i] && b.max[i] <= a.max[i]) { - - // cover left part - cur.min[i] = a.min[i]; cur.max[i] = b.min[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // cover center part - cur.min[i] = b.min[i]; cur.max[i] = b.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // cover right part - cur.min[i] = b.max[i]; cur.max[i] = a.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // if a is within b - } else if (b.min[i] <= a.min[i] && a.max[i] <= b.max[i]) { - - // cover inner part - cur.min[i] = a.min[i]; cur.max[i] = a.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // if a is on the left - } else if (a.min[i] <= b.min[i]) { - - // cover left part - cur.min[i] = a.min[i]; cur.max[i] = b.min[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // cover right part - cur.min[i] = b.min[i]; cur.max[i] = a.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // otherwise a is on the right - } else { - - // cover left part - cur.min[i] = a.min[i]; cur.max[i] = b.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // cover right part - cur.min[i] = b.max[i]; cur.max[i] = a.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - } - - } - - }; - - template<> - struct difference_computer<0> { - - template - void collectDifferences(const GridBox&, const GridBox& b, GridBox& cur, std::vector>& res) { - if(!b.covers(cur) && !cur.empty()) res.push_back(cur); - } - }; - - template - struct box_fuser { - template - bool apply(std::vector>& boxes) { - - // try fuse I-th dimension - for(std::size_t i = 0; i& a = boxes[i]; - GridBox& b = boxes[j]; - if (GridBox::template areFusable(a,b)) { - - // fuse the boxes - GridBox f = GridBox::template fuse(a,b); - boxes.erase(boxes.begin() + j); - boxes[i] = f; - - // start over again - apply(boxes); - return true; - } - } - } - - // fuse smaller dimensions - if (box_fuser().apply(boxes)) { - // start over again - apply(boxes); - return true; - } - - // no more changes - return false; - } - }; - - template<> - struct box_fuser<0> { - template - bool apply(std::vector>&) { return false; } - }; - - template - struct line_scanner { - template - void apply(const GridBox& box, GridPoint& a, GridPoint& b, const Lambda& body) { - for(coordinate_type i = box.min[Dims-I]; i < box.max[Dims-I]; ++i ) { - a[Dims-I] = i; - b[Dims-I] = i; - line_scanner().template apply(box,a,b,body); - } - } - }; - - template<> - struct line_scanner<1> { - template - void apply(const GridBox& box, GridPoint& a, GridPoint& b, const Lambda& body) { - a[Dims-1] = box.min[Dims-1]; - b[Dims-1] = box.max[Dims-1]; - body(a,b); - } - }; - } - - - template - class GridBox { - - static_assert(Dims >= 1, "0-dimension Grids (=Scalars) not yet supported."); - - template - friend struct detail::difference_computer; - - template - friend struct detail::line_scanner; - - template - friend class GridRegion; - - using point_type = GridPoint; - - point_type min; - point_type max; - - public: - GridBox() {} - - GridBox(coordinate_type N) - : min(0), max(N) {} - - GridBox(coordinate_type A, coordinate_type B) - : min(A), max(B) {} - - GridBox(const point_type& N) - : min(0), max(N) {} - - GridBox(const point_type& A, const point_type& B) - : min(A), max(B) {} - - bool empty() const { - return !min.strictlyDominatedBy(max); - } - - std::size_t area() const { - std::size_t res = 1; - for(std::size_t i=0; i merge(const GridBox& a, const GridBox& b) { - - // handle empty sets - if (a.empty() && b.empty()) return std::vector(); - if (a.empty()) return std::vector({b}); - if (b.empty()) return std::vector({a}); - - // boxes are intersecting => we have to do some work - auto res = difference(a,b); - res.push_back(b); - return res; - } - - static GridBox intersect(const GridBox& a, const GridBox& b) { - // compute the intersection - GridBox res = a; - for(std::size_t i = 0; i difference(const GridBox& a, const GridBox& b) { - - // handle case where b covers whole a - if (b.covers(a)) return std::vector(); - - // check whether there is an actual intersection - if (!a.intersectsWith(b)) { - return std::vector({a}); - } - - // slice up every single dimension - GridBox cur; - std::vector res; - detail::difference_computer().collectDifferences(a,b,cur,res); - return res; - } - - static GridBox span(const GridBox& a, const GridBox& b) { - return GridBox( - allscale::utils::elementwiseMin(a.min,b.min), - allscale::utils::elementwiseMax(a.max,b.max) - ); - } - - template - void scanByLines(const Lambda& body) const { - if (empty()) return; - point_type a; - point_type b; - detail::line_scanner().template apply(*this,a,b,body); - } - - template - static bool areFusable(const GridBox& a, const GridBox& b) { - static_assert(D < Dims, "Can not fuse on non-existing dimension."); - if (a.min > b.min) return areFusable(b,a); - if (a.max[D] != b.min[D]) return false; - for(std::size_t i = 0; i - static GridBox fuse(const GridBox& a, const GridBox& b) { - assert_true(areFusable(a,b)); - if (a.min[D] > b.min[D]) return fuse(b,a); - GridBox res = a; - res.max[D] = b.max[D]; - return res; - } - - friend std::ostream& operator<<(std::ostream& out, const GridBox& box) { - return out << "[" << box.min << " - " << box.max << "]"; - } - - /** - * An operator to load an instance of this range from the given archive. - */ - static GridBox load(utils::ArchiveReader& reader) { - auto min = reader.read(); - auto max = reader.read(); - return { min, max }; - } - - /** - * An operator to store an instance of this range into the given archive. - */ - void store(utils::ArchiveWriter& writer) const { - writer.write(min); - writer.write(max); - } - - /** - * Added by psalz for CELERITY on 2018/03/19. - */ - const point_type& get_min() const { return min; } - const point_type& get_max() const { return max; } - - /** - * Added by psalz for CELERITY on 2020/07/13. - */ - point_type& get_min() { return min; } - point_type& get_max() { return max; } - - }; - - template - class GridRegion { - - static_assert(Dims > 0, "0-dimensional grids are not supported yet"); - - using point_type = GridPoint; - using box_type = GridBox; - - std::vector regions; - - public: - - GridRegion() {} - - GridRegion(coordinate_type N) - : regions({box_type(N)}) { - if (0 >= N) regions.clear(); - } - - GridRegion(const point_type& size) - : regions({box_type(0,size)}) { - if (regions[0].empty()) regions.clear(); - } - - GridRegion(const point_type& min, const point_type& max) - : regions({box_type(min,max)}) { - assert_true(min.dominatedBy(max)); - if (regions[0].empty()) regions.clear(); - } - - GridRegion(const box_type& box) - : regions({box}) { - if (regions[0].empty()) regions.clear(); - } - - GridRegion(const GridRegion&) = default; - GridRegion(GridRegion&&) = default; - - GridRegion& operator=(const GridRegion&) = default; - GridRegion& operator=(GridRegion&&) = default; - - static GridRegion single(const point_type& p) { - return GridRegion(p,p+point_type(1)); - } - - box_type boundingBox() const { - // handle empty region - if (regions.empty()) return box_type(0); - - // if there is a single element - if (regions.size() == 1u) return regions.front(); - - // compute the bounding box - box_type res = regions.front(); - for(const box_type& cur : regions) { - res.min = utils::elementwiseMin(res.min, cur.min); - res.max = utils::elementwiseMax(res.max, cur.max); - } - return res; - } - - bool operator==(const GridRegion& other) const { - return difference(*this,other).empty() && other.difference(other,*this).empty(); - } - - bool operator!=(const GridRegion& other) const { - return regions != other.regions; - } - - bool empty() const { - return regions.empty(); - } - - std::size_t area() const { - std::size_t res = 0; - for(const auto& cur : regions) { - res += cur.area(); - } - return res; - } - - static GridRegion merge(const GridRegion& a, const GridRegion& b) { - - // if both sets are empty => done - if(a.empty() && b.empty()) return a; - - // build result - GridRegion res = a; - - // combine regions - for(const auto& cur : difference(b,a).regions) { - res.regions.push_back(cur); - } - - // compress result - res.compress(); - - // done - return res; - } - - template - static GridRegion merge(const GridRegion& a, const GridRegion& b, const Rest& ... rest) { - return merge(merge(a,b),rest...); - } - - static GridRegion intersect(const GridRegion& a, const GridRegion& b) { - - // if one of the sets is empty => done - if(a.empty()) return a; - if(b.empty()) return b; - - // build result - GridRegion res; - - // combine regions - for(const auto& curA : a.regions) { - for(const auto& curB : b.regions) { - box_type diff = box_type::intersect(curA,curB); - if (!diff.empty()) { - res.regions.push_back(diff); - } - } - } - - // compress result - res.compress(); - - // done - return res; - } - - static GridRegion difference(const GridRegion& a, const GridRegion& b) { - - // handle empty sets - if(a.empty() || b.empty()) return a; - - - // build result - GridRegion res = a; - - // combine regions - for(const auto& curB : b.regions) { - std::vector next; - for(const auto& curA : res.regions) { - for(const auto& n : box_type::difference(curA,curB)) { - next.push_back(n); - } - } - res.regions.swap(next); - } - - // compress result - res.compress(); - - // done - return res; - } - - static GridRegion span(const GridRegion& a, const GridRegion& b) { - GridRegion res; - for(const auto& ba : a.regions) { - for(const auto& bb : b.regions) { - res = merge(res,GridRegion(box_type::span(ba,bb))); - } - } - return res; - } - - /** - * Scans the covered range, line by line. - */ - template - void scanByLines(const Lambda& body) const { - for(const auto& cur : regions) { - cur.scanByLines(body); - } - } - - /** - * Scan the covered range, point by point. - */ - template - void scan(const Lambda& body) const { - scanByLines([&](point_type a, const point_type& b) { - for(; a[Dims-1] - void scanByBoxes(const Lambda& f) const { - for(const auto& cur : regions) { - f(cur); - } - } - - /** - * An operator to load an instance of this range from the given archive. - */ - static GridRegion load(utils::ArchiveReader& reader) { - // start with an empty region - GridRegion res; - - // read the box entries - res.regions = std::move(reader.read>()); - - // done - return res; - } - - /** - * An operator to store an instance of this range into the given archive. - */ - void store(utils::ArchiveWriter& writer) const { - // just save the regions - writer.write(regions); - } - - friend std::ostream& operator<<(std::ostream& out, const GridRegion& region) { - return out << "{" << utils::join(",",region.regions) << "}"; - } - - private: - - void compress() { - // try to fuse boxes - detail::box_fuser().apply(regions); - } - - }; - - - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale - diff --git a/vendor/allscale/api/user/data/map.h b/vendor/allscale/api/user/data/map.h deleted file mode 100644 index 11ea47afe..000000000 --- a/vendor/allscale/api/user/data/map.h +++ /dev/null @@ -1,335 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "allscale/api/core/data.h" -#include "allscale/utils/assert.h" - -#include "allscale/utils/printer/set.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - /** - * This header file defines an example data item covering a generic map of key-value pairs. - * The corresponding elements are: - * - a range type which corresponds to a set of keys - * - a fragment type capable of storing a share of the represented data - * - a facade type to be offered to the user as an interface - */ - - - // --------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------- - - - - template - class SetRegion; - - template - class Map; - - template - class MapFragment; - - - - // --------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------- - - /** - * The implementation of a set-region enumerating the covered elements explicitly. - * - * @tparam Element the type of element to describe an element within the set; the type - * has to be serializable - */ - template - class SetRegion { - - /** - * The elements covered by this region, explicitly enumerated. - */ - std::set elements; - - public: - - /** - * Adds a new element to this region. - */ - void add(const Element& e) { - elements.insert(e); - } - - /** - * Add multiple elements at once. - */ - template - void add(const Element& e, const Rest& ... rest) { - add(e); add(rest...); - } - - /** - * Terminal case for adding multiple elements. - */ - void add() { /* nothing */ } - - /** - * Obtains a list of all covered elements. - */ - const std::set& getElements() const { - return elements; - } - - // -- requirements imposed by the region concept -- - - /** - * Determines whether this region is empty. - */ - bool empty() const { - return elements.empty(); - } - - /** - * A comparison operator comparing regions on equality. - */ - bool operator==(const SetRegion& other) const { - return elements == other.elements; - } - - /** - * A comparison operator comparing regions for inequality. - */ - bool operator!=(const SetRegion& other) const { - return !(*this == other); - } - - /** - * An operator to merge two set regions. - */ - static SetRegion merge(const SetRegion& a, const SetRegion& b) { - SetRegion res; - std::set_union(a.elements.begin(),a.elements.end(),b.elements.begin(),b.elements.end(),std::inserter(res.elements, res.elements.begin())); - return res; - } - - /** - * An operator to intersect two set regions. - */ - static SetRegion intersect(const SetRegion& a, const SetRegion& b) { - SetRegion res; - std::set_intersection(a.elements.begin(), a.elements.end(), b.elements.begin(), b.elements.end(), std::inserter(res.elements, res.elements.begin())); - return res; - } - - /** - * An operator to compute the set-difference of two set regions. - */ - static SetRegion difference(const SetRegion& a, const SetRegion& b) { - SetRegion res; - std::set_difference(a.elements.begin(), a.elements.end(), b.elements.begin(), b.elements.end(), std::inserter(res.elements, res.elements.begin())); - return res; - } - - static SetRegion span(const SetRegion&, const SetRegion&) { - std::cout << "Unsupported operation: cannot computed span on set regions!"; - exit(1); - } - - /** - * An operator to load an instance of this range from the given archive. - */ - static SetRegion load(utils::ArchiveReader&) { - assert_not_implemented(); - return SetRegion(); - } - - /** - * An operator to store an instance of this range into the given archive. - */ - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - // nothing so far - } - - /** - * Enables printing the elements of this set region. - */ - friend std::ostream& operator<<(std::ostream& out, const SetRegion& region) { - return out << region.elements; - } - }; - - /** - * An implementation of a fragment of a map-like data item. Each fragment - * stores a sub-section of the key-value pairs to be maintained by the overall map. - * - * @tparam Key the key type of the map to be stored - * @tparam Value the value type of the data to be associated to the key - */ - template - class MapFragment { - - /** - * The region this fragment is covering. - */ - SetRegion region; - - /** - * The data stored in this fragment. - */ - std::map data; - - // enables the facade to access internal data of this class. - friend class Map; - - public: - - using shared_data_type = core::no_shared_data; - using facade_type = Map; - using region_type = SetRegion; - - /** - * Create a new fragment covering the given region. - */ - MapFragment(const region_type& region) - : MapFragment(core::no_shared_data(),region) {} - - /** - * Create a new fragment covering the given region. - */ - MapFragment(const core::no_shared_data&, const region_type& region) : region(region) { - for(const auto& key : region.getElements()) { - data[key]; // initialize content by accessing elements - } - } - - /** - * Obtains a facade to this fragment to be forwarded by the data manager to the user code - * for interacting with this fragment. - */ - Map mask() { - return Map(*this); - } - - /** - * Obtains the range of data covered by this fragment. - */ - const region_type& getCoveredRegion() const { - return region; - } - - /** - * Resizes this fragment to provide enough space to store values for the given key-set. - */ - void resize(const region_type& keys) { - - // update the covered region - region = keys; - - // build up new data storage - std::map newData; - for(const auto& key : keys.getElements()) { - auto pos = data.find(key); - newData[key] = (pos != data.end()) ? pos->second : Value(); - } - - // swap data containers - data.swap(newData); - } - - /** - * Merges all the data from the given fragment into this fragment. - */ - void insert(const MapFragment& other, const region_type& fraction) { - assert_true(core::isSubRegion(fraction,region)) - << "Cannot insert non-sub-set region into this fragment."; - assert_true(core::isSubRegion(fraction,other.region)) - << "Cannot load non-sub-set region from other fragment."; - // move in data - for(const auto& cur : fraction.getElements()) { - auto pos = other.data.find(cur); - assert_true(pos != other.data.end()); - data[cur] = pos->second; - } - } - - void extract(utils::ArchiveWriter&, const region_type&) const { - assert_not_implemented(); - } - - void insert(utils::ArchiveReader&) { - assert_not_implemented(); - } - - }; - - - /** - * The map facade forming the actual data item to be managed by the - * runtime system. - * - * @tparam Key a key type, needs to be serializable - * @tparam Value a value type, needs to be serializable as well - */ - template - class Map : public core::data_item> { - - /** - * A pointer to an underlying fragment owned if used in an unmanaged state. - */ - std::unique_ptr> owned; - - /** - * A reference to the fragment instance operating on, referencing the owned fragment or an externally managed one. - */ - MapFragment& base; - - /** - * Enables fragments to use the private constructor below. - */ - friend class MapFragment; - - /** - * The constructor to be utilized by the fragment to create a facade for an existing fragment. - */ - Map(MapFragment& base) : base(base) {} - - public: - - /** - * Creates a new map covering the given region. - */ - Map(const SetRegion& keys) : owned(std::make_unique>(keys)), base(*owned) {} - - /** - * Provides read/write access to one of the values stored within this map. - */ - Value& operator[](const Key& key) { - auto pos = base.data.find(key); - assert_true(pos != base.data.end()) << "Access to invalid key: " << key << " - covered region: " << base.region; - return pos->second; - } - - /** - * Provides read access to one of the values stored within this map. - */ - const Value& operator[](const Key& key) const { - auto pos = base.data.find(key); - assert_true(pos != base.data.end()) << "Access to invalid key: " << key << " - covered region: " << base.region; - return pos->second; - } - - }; - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/data/mesh.h b/vendor/allscale/api/user/data/mesh.h deleted file mode 100644 index 14586b8f9..000000000 --- a/vendor/allscale/api/user/data/mesh.h +++ /dev/null @@ -1,3444 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include -#include - -#include "allscale/utils/assert.h" -#include "allscale/utils/bitmanipulation.h" -#include "allscale/utils/io_utils.h" -#include "allscale/utils/range.h" -#include "allscale/utils/raw_buffer.h" -#include "allscale/utils/serializer.h" -#include "allscale/utils/static_map.h" -#include "allscale/utils/table.h" -#include "allscale/utils/array_utils.h" -#include "allscale/utils/tuple_utils.h" - -#include "allscale/utils/printer/vectors.h" - -#include "allscale/api/core/data.h" -#include "allscale/api/core/prec.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - - // -------------------------------------------------------------------- - // Declarations - // -------------------------------------------------------------------- - - - - // --- mesh type parameter constructs --- - - /** - * The base type of edges connecting nodes of kind A with nodes of kind B - * on the same level. - */ - template - struct edge { - using src_node_kind = A; - using trg_node_kind = B; - }; - - - /** - * The base type of edges connecting nodes of kind A with nodes of kind B - * on adjacent levels. - */ - template - struct hierarchy { - using parent_node_kind = A; - using child_node_kind = B; - }; - - /** - * The constructor for the list of node kinds to be included in a mesh structure. - */ - template - struct nodes { - enum { size = sizeof...(Nodes) }; - }; - - /** - * The constructor for the list of edge kinds to be included in a mesh structure. - */ - template - struct edges { - enum { size = sizeof...(Edges) }; - }; - - /** - * The constructor for the list of hierarchies to be included in a mesh structure. - */ - template - struct hierarchies { - enum { size = sizeof...(Hierarchies) }; - }; - - - // --- mesh type parameter constructs --- - - - /** - * The type used for addressing nodes within meshes. - */ - template - struct NodeRef; - - /** - * The type used for iterating over lists of nodes, e.g. a list of adjacent nodes. - */ - template - using NodeList = utils::range*>; - - - /** - * The type for representing the topological information of a hierarchical mesh. - */ - template< - typename NodeKinds, // < list of node types in each level - typename EdgeKinds, // < list of edge types connecting nodes within levels - typename Hierarchies = hierarchies<>, // < list of edge types connecting nodes between adjacent levels - unsigned Levels = 1, // < number of levels in the hierarchy - unsigned PartitionDepth = 0 // < number of partitioning level - > - class Mesh; - - - /** - * The type for associating (dynamic) information to nodes within a mesh. - */ - template< - typename NodeKind, // < the type of node to be annotated - typename ElementType, // < the type of value to be associated to each node on the given level - unsigned Level, // < the level of the mesh to be annotated - typename PartitionTree // < the type of the partition tree indexing the associated mesh - > - class MeshData; - - - /** - * A utility to construct meshes. - */ - template< - typename NodeKinds, // < list of node types in each level - typename EdgeKinds, // < list of edge types connecting nodes within levels - typename Hierarchies = hierarchies<>, // < list of edge types connecting nodes between adjacent levels - unsigned Levels = 1 // < number of levels in the hierarchy - > - class MeshBuilder; - - - // -- mesh attributes -- - - /** - * The base type for mesh property kinds. - */ - template - struct mesh_property { - using node_kind = NodeKind; - using value_type = ValueType; - }; - - /** - * A container for a collection of mesh properties. A mesh property is - * a value associated to a certain kind of node on each level of a mesh. - * The MeshProperties container allows multiple properties to be managed - * within a single, consistent entity. - * - * To create an instance, the factory function "createProperties" of - * the Mesh structure has to be utilized. - */ - template - class MeshProperties; - - - - // -------------------------------------------------------------------- - // Definitions - // -------------------------------------------------------------------- - - // The type used for indexing nodes in meshes - using node_index_t = uint64_t; - - // The type used for identifying nodes within meshes. - struct NodeID { - - node_index_t id; - - NodeID() = default; - - constexpr explicit NodeID(node_index_t id) : id(id) {} - - operator node_index_t() const { - return id; - } - - node_index_t getOrdinal() const { - return id; - } - - bool operator==(const NodeID& other) const { - return id == other.id; - } - - bool operator!=(const NodeID& other) const { - return id != other.id; - } - - bool operator<(const NodeID& other) const { - return id < other.id; - } - - friend std::ostream& operator<<(std::ostream& out, const NodeID& ref) { - return out << "n" << ref.id; - } - - }; - - /** - * The type used for addressing nodes within meshes. - */ - template - struct NodeRef : public NodeID { - - using node_kind = Kind; - - enum { level = Level }; - - NodeRef() = default; - - constexpr explicit NodeRef(node_index_t id) - : NodeID(id) {} - - constexpr explicit NodeRef(NodeID id) - : NodeID(id) {} - - }; - - - template - class NodeRange { - - NodeRef _begin; - - NodeRef _end; - - public: - - NodeRange(const NodeRef& a, const NodeRef& b) : _begin(a), _end(b) { - assert_le(_begin.id,_end.id); - } - - NodeRange() : _begin(), _end() {} - - NodeRef getBegin() const { - return _begin; - } - - NodeRef getEnd() const { - return _end; - } - - NodeRef operator[](std::size_t index) const { - return NodeRef(NodeID(_begin.id + (node_index_t)index)); - } - - std::size_t size() const { - return _end.id - _begin.id; - } - - - class const_iterator : public std::iterator> { - - node_index_t cur; - - public: - - const_iterator(NodeID pos) : cur(pos) {}; - - bool operator==(const const_iterator& other) const { - return cur == other.cur; - } - - bool operator!=(const const_iterator& other) const { - return !(*this == other); - } - - bool operator<(const const_iterator& other) const { - return cur < other.cur; - } - - bool operator<=(const const_iterator& other) const { - return cur <= other.cur; - } - - bool operator>=(const const_iterator& other) const { - return cur >= other.cur; - } - - bool operator>(const const_iterator& other) const { - return cur > other.cur; - } - - NodeRef operator*() const { - return NodeRef{cur}; - } - - const_iterator& operator++() { - ++cur; - return *this; - } - - const_iterator operator++(int) { - const_iterator res = *this; - ++cur; - return res; - } - - const_iterator& operator--() { - --cur; - return *this; - } - - const_iterator operator--(int) { - const_iterator res = *this; - --cur; - return res; - } - - const_iterator& operator+=(std::ptrdiff_t n) { - cur += n; - return *this; - } - - const_iterator& operator-=(std::ptrdiff_t n) { - cur -= n; - return *this; - } - - friend const_iterator operator+(const_iterator& iter, std::ptrdiff_t n) { - const_iterator res = iter; - res.cur += n; - return res; - - } - - friend const_iterator& operator+(std::ptrdiff_t n, const_iterator& iter) { - const_iterator res = iter; - res.cur += n; - return res; - } - - const_iterator operator-(std::ptrdiff_t n) { - const_iterator res = *this; - res.cur -= n; - return res; - } - - std::ptrdiff_t operator-(const_iterator& other) const { - return std::ptrdiff_t(cur - other.cur); - } - - NodeRef operator[](std::ptrdiff_t n) const { - return *(*this + n); - } - - }; - - const_iterator begin() const { - return const_iterator(_begin); - } - - const_iterator end() const { - return const_iterator(_end); - } - - template - void forAll(const Body& body) { - for(const auto& cur : *this) { - body(cur); - } - } - - friend std::ostream& operator<<(std::ostream& out, const NodeRange& range) { - return out << "[" << range._begin.id << "," << range._end.id << ")"; - } - - }; - - - namespace detail { - - template - struct is_nodes : public std::false_type {}; - - template - struct is_nodes> : public std::true_type {}; - - template - struct is_edges : public std::false_type {}; - - template - struct is_edges> : public std::true_type {}; - - template - struct is_hierarchies : public std::false_type {}; - - template - struct is_hierarchies> : public std::true_type {}; - - template - struct level { - enum { value = Level }; - }; - - - template - struct get_level; - - template - struct get_level> { - enum { value = L }; - }; - - template - struct get_level : public get_level {}; - template - struct get_level : public get_level {}; - template - struct get_level : public get_level {}; - - template - using plain_type = typename std::remove_cv::type>::type; - - - template - void sumPrefixes(utils::Table& list) { - Element counter = 0; - for(auto& cur : list) { - auto tmp = cur; - cur = counter; - counter += tmp; - } - } - - - template - class NodeSet { - - using LevelData = utils::StaticMap,std::size_t>; - - using DataStore = std::array; - - static_assert(std::is_trivial::value, "The implementation assumes that this type is trivial!"); - - DataStore data; - - public: - - NodeSet() { - for(auto& cur : data) cur = LevelData(0); - } - - NodeSet(const NodeSet&) = default; - NodeSet(NodeSet&& other) = default; - - NodeSet& operator=(const NodeSet&) =default; - NodeSet& operator=(NodeSet&&) =default; - - - // -- observers and mutators -- - - template - NodeRef create() { - auto& node_counter = getNodeCounter(); - return NodeRef(node_counter++); - } - - template - NodeRange create(std::size_t num) { - auto& node_counter = getNodeCounter(); - NodeRef begin((node_index_t)node_counter); - node_counter += num; - NodeRef end((node_index_t)node_counter); - return { begin, end }; - } - - template - std::size_t getNumNodes() const { - return getNodeCounter(); - } - - // -- IO support -- - - void store(std::ostream& out) const { - // store the number of nodes - utils::write(out, data); - } - - static NodeSet load(std::istream& in) { - - // produce result - NodeSet res; - - // restore the number of nodes - res.data = utils::read(in); - - // done - return res; - } - - static NodeSet interpret(utils::RawBuffer& buffer) { - - // produce result - NodeSet res; - - // restore the number of nodes - res.data = buffer.consume(); - - // done - return res; - - } - - private: - - template - std::size_t& getNodeCounter() { - return data[Level].template get(); - } - - template - const std::size_t& getNodeCounter() const { - return data[Level].template get(); - } - }; - - - template - class EdgeSet { - - // -- the data stored per relation -- - class Relation { - - static_assert( - sizeof(NodeRef) == sizeof(NodeID), - "For this implementation to be correct node references have to be simple node IDs." - ); - - utils::Table forward_offsets; - utils::Table forward_targets; - - utils::Table backward_offsets; - utils::Table backward_targets; - - std::vector> edges; - - public: - - template - NodeList getSinks(const NodeRef& src) const { - using List = NodeList; - using TrgNodeRef = NodeRef; - assert_true(isClosed()) << "Accessing non-closed edge set!"; - if (src.id+1 >= forward_offsets.size() || forward_targets.empty()) return List{nullptr,nullptr}; - return List{ - reinterpret_cast(&forward_targets[forward_offsets[src.id]]), - reinterpret_cast(&forward_targets[forward_offsets[src.id+1]]) - }; - } - - template - NodeList getSources(const NodeRef& src) const { - using List = NodeList; - using SrcNodeRef = NodeRef; - assert_true(isClosed()) << "Accessing non-closed edge set!"; - if (src.id+1 >= backward_offsets.size() || backward_targets.empty()) return List{nullptr,nullptr}; - return List{ - reinterpret_cast(&backward_targets[backward_offsets[src.id]]), - reinterpret_cast(&backward_targets[backward_offsets[src.id+1]]) - }; - } - - void addEdge(NodeID from, NodeID to) { - edges.push_back({from,to}); - } - - bool isClosed() const { - return edges.empty(); - } - - void close() { - - // get maximum source and target - std::size_t maxSourceID = 0; - std::size_t maxTargetID = 0; - for(const auto& cur : edges) { - maxSourceID = std::max(maxSourceID,cur.first); - maxTargetID = std::max(maxTargetID,cur.second); - } - - // init forward / backward vectors - forward_offsets = utils::Table(maxSourceID + 2, 0); - forward_targets = utils::Table(edges.size()); - - backward_offsets = utils::Table(maxTargetID + 2,0); - backward_targets = utils::Table(edges.size()); - - // count number of sources / sinks - for(const auto& cur : edges) { - ++forward_offsets[cur.first]; - ++backward_offsets[cur.second]; - } - - // compute prefix sums - sumPrefixes(forward_offsets); - sumPrefixes(backward_offsets); - - // fill in targets - auto forward_pos = forward_offsets; - auto backward_pos = backward_offsets; - for(const auto& cur : edges) { - forward_targets[forward_pos[cur.first]++] = cur.second; - backward_targets[backward_pos[cur.second]++] = cur.first; - } - - // clear edges - edges.clear(); - - } - - void store(std::ostream& out) const { - // only allow closed sets to be stored - assert_true(isClosed()); - - // write forward edge data - forward_offsets.store(out); - forward_targets.store(out); - - // write backward edge data - backward_offsets.store(out); - backward_targets.store(out); - - } - - static Relation load(std::istream& in) { - - Relation res; - - // restore edge data - res.forward_offsets = utils::Table::load(in); - res.forward_targets = utils::Table::load(in); - - res.backward_offsets = utils::Table::load(in); - res.backward_targets = utils::Table::load(in); - - // done - return res; - } - - static Relation interpret(utils::RawBuffer& buffer) { - - Relation res; - - // restore edge data - res.forward_offsets = utils::Table::interpret(buffer); - res.forward_targets = utils::Table::interpret(buffer); - - res.backward_offsets = utils::Table::interpret(buffer); - res.backward_targets = utils::Table::interpret(buffer); - - // done - return res; - } - - }; - - using LevelData = utils::StaticMap,Relation>; - - using EdgeData = std::array; - - EdgeData data; - - public: - - EdgeSet() = default; - EdgeSet(const EdgeSet&) = default; - EdgeSet(EdgeSet&& other) = default; - - EdgeSet& operator=(const EdgeSet&) = delete; - EdgeSet& operator=(EdgeSet&&) = default; - - - template - void addEdge(const NodeRef& src, const NodeRef& trg) { - getEdgeRelation().addEdge(src,trg); - } - - void close() { - // for all levels - for(auto& level : data) { - // for all edge kinds - for(auto& rel : level) { - rel.close(); - } - } - } - - bool isClosed() const { - // for all levels - for(const auto& level : data) { - // for all edge kinds - for(const auto& rel : level) { - // check this instance - if (!rel.isClosed()) return false; - } - } - // all are done - return true; - } - - template - NodeList getSinks(const NodeRef& src) const { - return getEdgeRelation().template getSinks(src); - } - - template - NodeList getSources(const NodeRef& src) const { - return getEdgeRelation().template getSources(src); - } - - // -- IO support -- - - void store(std::ostream& out) const { - // only allow closed sets to be stored - assert_true(isClosed()); - - // store each relation independently - for(const auto& level : data) { - for(const auto& rel : level) { - rel.store(out); - } - } - - } - - static EdgeSet load(std::istream& in) { - - EdgeSet res; - - // load each relation independently - for(auto& level : res.data) { - for(auto& rel : level) { - rel = Relation::load(in); - } - } - - // done - return res; - } - - static EdgeSet interpret(utils::RawBuffer& buffer) { - - EdgeSet res; - - // interpret each relation independently - for(auto& level : res.data) { - for(auto& rel : level) { - rel = Relation::interpret(buffer); - } - } - - // done - return res; - } - - private: - - template - Relation& getEdgeRelation() { - return data[Level].template get(); - } - - template - const Relation& getEdgeRelation() const { - return data[Level].template get(); - } - - }; - - - template - class HierarchySet { - - class Relation { - - // -- inefficient build structures -- - - std::vector> children; - - std::vector parents; - - // -- efficient simulation structures -- - - utils::Table parent_targets; - - utils::Table children_offsets; - utils::Table children_targets; - - public: - - void addChild(const NodeID& parent, const NodeID& child) { - // a constant for an unknown parent - static const NodeID unknownParent(std::numeric_limits::max()); - - assert_ne(parent,unknownParent) << "Unknown parent constant must not be used!"; - - // register child as a child of parent - if (parent >= children.size()) { - children.resize(parent + 1); - } - auto& list = children[parent]; - for(auto& cur : list) if (cur == child) return; - list.push_back(child); - - - // register parent of child - if (child >= parents.size()) { - parents.resize(child + 1,unknownParent); - } - auto& trg = parents[child]; - assert_true(trg == unknownParent || trg == parent) - << "Double-assignment of parent for child " << child << " and parent " << parent; - - // update parent - trg = parent; - } - - bool isClosed() const { - return children.empty(); - } - - void close() { - // a constant for an unknown parent - static const NodeID unknownParent(std::numeric_limits::max()); - - // get maximum index of parents - std::size_t maxParent = 0; - for(const auto& cur : parents) { - maxParent = std::max(maxParent,cur); - } - - // compute total number of parent-child links - std::size_t numParentChildLinks = 0; - for(const auto& cur : children) { - numParentChildLinks += cur.size(); - } - - // init forward / backward vectors - children_offsets = utils::Table(maxParent + 2, 0); - children_targets = utils::Table(numParentChildLinks); - - // init child offsets - std::size_t idx = 0; - std::size_t offset = 0; - for(const auto& cur : children) { - children_offsets[idx] = offset; - offset += cur.size(); - idx++; - if (idx > maxParent) break; - } - children_offsets[idx] = offset; - - // fill in targets - idx = 0; - for(const auto& cur : children) { - for(const auto& child : cur) { - children_targets[idx++] = child; - } - } - - // clear edges - children.clear(); - - // init parent target table - parent_targets = utils::Table(parents.size()); - for(std::size_t i=0; i - NodeList getChildren(const NodeRef& parent) const { - using List = NodeList; - using ChildNodeRef = NodeRef; - assert_true(isClosed()); - if (parent.id >= children_offsets.size()-1 || children_targets.empty()) return List{nullptr,nullptr}; - return List{ - reinterpret_cast(&children_targets[children_offsets[parent.id]]), - reinterpret_cast(&children_targets[children_offsets[parent.id+1]]) - }; - } - - template - NodeRef getParent(const NodeRef& child) const { - using ParentNodeRef = NodeRef; - assert_true(isClosed()); - assert_lt(child.id,parent_targets.size()); - return ParentNodeRef(parent_targets[child.id]); - } - - // -- IO support -- - - void store(std::ostream& out) const { - // only allow closed sets to be stored - assert_true(isClosed()); - - // write parents table - parent_targets.store(out); - - // write child lists - children_offsets.store(out); - children_targets.store(out); - } - - static Relation load(std::istream& in) { - - Relation res; - - // restore parents - res.parent_targets = utils::Table::load(in); - - res.children_offsets = utils::Table::load(in); - res.children_targets = utils::Table::load(in); - - // done - return res; - } - - static Relation interpret(utils::RawBuffer& buffer) { - - Relation res; - - // restore parents - res.parent_targets = utils::Table::interpret(buffer); - - res.children_offsets = utils::Table::interpret(buffer); - res.children_targets = utils::Table::interpret(buffer); - - // done - return res; - } - - }; - - using LevelData = utils::StaticMap,Relation>; - - using HierarchyData = std::array; - - HierarchyData data; - - public: - - template - void addChild(const NodeRef& parent, const NodeRef& child) { - getRelation().addChild(parent,child); - } - - void close() { - for(auto& level : data) { - for(auto& rel : level) { - rel.close(); - } - } - } - - bool isClosed() const { - for(const auto& level : data) { - for(const auto& rel : level) { - if (!rel.isClosed()) return false; - } - } - return true; - } - - template - NodeList getChildren(const NodeRef& parent) const { - return getRelation().template getChildren(parent); - } - - template - NodeRef getParent(const NodeRef& child) const { - return getRelation().template getParent(child); - } - - - // -- IO support -- - - void store(std::ostream& out) const { - // only allow closed sets to be stored - assert_true(isClosed()); - - // store each relation independently - for(const auto& level : data) { - for(const auto& rel : level) { - rel.store(out); - } - } - - } - - static HierarchySet load(std::istream& in) { - - HierarchySet res; - - // load each relation independently - for(auto& level : res.data) { - for(auto& rel : level) { - rel = Relation::load(in); - } - } - - // done - return res; - } - - static HierarchySet interpret(utils::RawBuffer& buffer) { - - HierarchySet res; - - // interpret each relation independently - for(auto& level : res.data) { - for(auto& rel : level) { - rel = Relation::interpret(buffer); - } - } - - // done - return res; - } - - private: - - template - Relation& getRelation() { - return data[Level].template get(); - } - - template - const Relation& getRelation() const { - return data[Level].template get(); - } - - }; - - - // -- utilities for enumerating level/kind combinations -- - - template - struct KindEnumerator; - - template - struct KindEnumerator { - template - void operator()(const Body& body) const { - body(First()); - KindEnumerator()(body); - } - }; - - template<> - struct KindEnumerator<> { - template - void operator()(const Body&) const {} - }; - - - template - struct LevelEnumerator { - template - void operator()(const Body& body) const { - body(level()); - LevelEnumerator()(body); - } - }; - - template<> - struct LevelEnumerator<0> { - template - void operator()(const Body& body) const { - body(level<0>()); - } - }; - - template - struct HierarchyLevelEnumerator { - template - void operator()(const Body& body) const { - body(level()); - HierarchyLevelEnumerator()(body); - } - }; - - template<> - struct HierarchyLevelEnumerator<1> { - template - void operator()(const Body& body) const { - body(level<1>()); - } - }; - - template<> - struct HierarchyLevelEnumerator<0> { - template - void operator()(const Body&) const {} - }; - - - // -- mesh topology store -- - - template< - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned Levels - > - struct MeshTopologyData; - - template< - typename ... Nodes, - typename ... Edges, - typename ... Hierarchies, - unsigned Levels - > - struct MeshTopologyData,edges,hierarchies,Levels> { - - using NodeSetType = NodeSet; - using EdgeSetType = EdgeSet; - using HierarchySetType = HierarchySet; - - // the topological data of all the nodes, edges and hierarchy relations on all levels - NodeSetType nodeSets; - EdgeSetType edgeSets; - HierarchySetType hierarchySets; - - MeshTopologyData() = default; - MeshTopologyData(const MeshTopologyData&) = default; - MeshTopologyData(MeshTopologyData&& other) = default; - - MeshTopologyData& operator= (MeshTopologyData&& m) = default; - - template - void forAllNodeKinds(const Body& body) const { - LevelEnumerator forAllLevels; - KindEnumerator forAllKinds; - forAllLevels([&](const auto& level){ - forAllKinds([&](const auto& kind){ - body(kind,level); - }); - }); - } - - template - void forAllEdgeKinds(const Body& body) const { - LevelEnumerator forAllLevels; - KindEnumerator forAllKinds; - forAllLevels([&](const auto& level){ - forAllKinds([&](const auto& kind){ - body(kind,level); - }); - }); - } - - template - void forAllHierarchyKinds(const Body& body) const { - HierarchyLevelEnumerator forAllLevels; - KindEnumerator forAllKinds; - forAllLevels([&](const auto& level){ - forAllKinds([&](const auto& kind){ - body(kind,level); - }); - }); - } - - template - std::size_t getNumNodes() const { - return nodeSets.template getNumNodes(); - } - - void close() { - edgeSets.close(); - hierarchySets.close(); - } - - bool isClosed() const { - return edgeSets.isClosed() && hierarchySets.isClosed(); - } - - // -- IO support -- - - void store(std::ostream& out) const { - nodeSets.store(out); - edgeSets.store(out); - hierarchySets.store(out); - } - - static MeshTopologyData load(std::istream& in) { - MeshTopologyData res; - res.nodeSets = NodeSetType::load(in); - res.edgeSets = EdgeSetType::load(in); - res.hierarchySets = HierarchySetType::load(in); - return std::move(res); - } - - static MeshTopologyData interpret(utils::RawBuffer& buffer) { - MeshTopologyData res; - res.nodeSets = NodeSetType::interpret(buffer); - res.edgeSets = EdgeSetType::interpret(buffer); - res.hierarchySets = HierarchySetType::interpret(buffer); - return std::move(res); - } - - }; - - /** - * A common basis class for sub-tree and sub-graph references, which are both based on paths - * within a tree. - */ - template - class PathRefBase { - - protected: - - using value_t = uint32_t; - - value_t path; - value_t mask; - - PathRefBase(value_t path, value_t mask) - : path(path), mask(mask) {} - - public: - - static Derived root() { - return { 0u , 0u }; - } - - value_t getPath() const { - return path; - } - - value_t getMask() const { - return mask; - } - - value_t getDepth() const { - if (PathRefBase::mask == 0) return 0; - return sizeof(PathRefBase::mask) * 8 - utils::countLeadingZeros(PathRefBase::mask); - } - - bool isRoot() const { - return PathRefBase::mask == 0; - } - - bool isLeftChild() const { - assert_false(isRoot()); - return !isRightChild(); - } - - bool isRightChild() const { - assert_false(isRoot()); - return PathRefBase::path & (1 << (getDepth()-1)); - } - - Derived getLeftChild() const { - assert_lt(getDepth(),sizeof(PathRefBase::path)*8); - Derived res = asDerived(); - res.PathRefBase::mask = res.PathRefBase::mask | (1 << getDepth()); - return res; - } - - Derived getRightChild() const { - Derived res = getLeftChild(); - res.PathRefBase::path = res.PathRefBase::path | (1 << getDepth()); - return res; - } - - bool operator==(const Derived& other) const { - // same mask and same valid bit part - return (PathRefBase::mask == other.PathRefBase::mask) && - ((PathRefBase::path & PathRefBase::mask) == (other.PathRefBase::path & other.PathRefBase::mask)); - } - - bool operator!=(const Derived& other) const { - return !(*this == other); - } - - bool operator<(const Derived& other) const { - - auto thisMask = PathRefBase::mask; - auto thatMask = other.PathRefBase::mask; - - auto thisPath = PathRefBase::path; - auto thatPath = other.PathRefBase::path; - - while(true) { - - // if they are the same, we are done - if (thisMask == thatMask && thisPath == thatPath) return false; - - // check last mask bit - auto thisMbit = thisMask & 0x1; - auto thatMbit = thatMask & 0x1; - - if (thisMbit < thatMbit) return true; - if (thisMbit > thatMbit) return false; - - auto thisPbit = thisMbit & thisPath; - auto thatPbit = thatMbit & thatPath; - - if (thisPbit < thatPbit) return true; - if (thisPbit > thatPbit) return false; - - thisMask >>= 1; - thatMask >>= 1; - thisPath >>= 1; - thatPath >>= 1; - } - } - - bool operator<=(const Derived& other) const { - return *this == other || *this < other; - } - - bool operator>=(const Derived& other) const { - return !(asDerived() < other); - } - - bool operator>(const Derived& other) const { - return !(*this <= other); - } - - bool covers(const Derived& other) const { - if (getDepth() > other.getDepth()) return false; - if (PathRefBase::mask != (PathRefBase::mask & other.PathRefBase::mask)) return false; - return (PathRefBase::mask & PathRefBase::path) == (PathRefBase::mask & other.PathRefBase::path); - } - - bool tryMerge(const Derived& other) { - - if (covers(other)) return true; - - if (other.covers(asDerived())) { - *this = other; - return true; - } - - // the masks need to be identical - auto thisMask = PathRefBase::mask; - auto thatMask = other.PathRefBase::mask; - if (thisMask != thatMask) return false; - - - // the valid portion of the paths must only differe in one bit - auto thisPath = PathRefBase::path; - auto thatPath = other.PathRefBase::path; - - auto thisValid = thisPath & thisMask; - auto thatValid = thatPath & thatMask; - - auto diff = thisValid ^ thatValid; - - // if there is more than 1 bit difference, there is nothing we can do - if (utils::countOnes(diff) != 1) return false; - - // ignore this one bit in the mask - PathRefBase::mask = PathRefBase::mask & (~diff); - - // done - return true; - } - - /** - * @return true if the intersection is not empty; - * in this case this instance has been updated to represent the intersection - * false if the intersection is empty, the object has not been altered - */ - bool tryIntersect(const Derived& other) { - - // if the other covers this, the intersection is empty - if (other.covers(asDerived())) return true; - - // if this one is the larger one, this one gets reduced to the smaller one - if (covers(other)) { - *this = other; - return true; - } - - // make sure common constraints are identical - auto filterMask = PathRefBase::mask & other.PathRefBase::mask; - auto thisFilter = PathRefBase::path & filterMask; - auto thatFilter = other.PathRefBase::path & filterMask; - if (thisFilter != thatFilter) return false; - - // unite (disjunction!) the constraints of both sides - PathRefBase::path = (PathRefBase::path & PathRefBase::mask) | (other.PathRefBase::path & other.PathRefBase::mask); - PathRefBase::mask = PathRefBase::mask | other.PathRefBase::mask; - return true; - } - - - - template - void visitComplement(const Body& body, unsigned depth = 0) const { - - // when we reached the depth of this reference, we are done - if (getDepth() == depth) return; - - auto bitMask = (1 << depth); - - // if at this depth there is no wild card - if (PathRefBase::mask & bitMask) { - - // invert bit at this position - Derived cpy = asDerived(); - cpy.PathRefBase::path ^= bitMask; - cpy.PathRefBase::mask = cpy.PathRefBase::mask & ((bitMask << 1) - 1); - - // this is an element of the complement - body(cpy); - - // continue path - visitComplement(body,depth+1); - - return; - } - - // follow both paths, do nothing here - Derived cpy = asDerived(); - cpy.PathRefBase::mask = PathRefBase::mask | bitMask; - - // follow the 0 path - cpy.PathRefBase::path = PathRefBase::path & ~bitMask; - cpy.template visitComplement(body,depth+1); - - // follow the 1 path - cpy.PathRefBase::path = PathRefBase::path | bitMask; - cpy.template visitComplement(body,depth+1); - - } - - std::vector getComplement() const { - std::vector res; - visitComplement([&](const Derived& cur){ - res.push_back(cur); - }); - return res; - } - - private: - - Derived& asDerived() { - return static_cast(*this); - } - - const Derived& asDerived() const { - return static_cast(*this); - } - - }; - - - /** - * A utility to address nodes in the partition tree. - */ - class SubTreeRef : public PathRefBase { - - using super = PathRefBase; - - friend super; - - friend class SubMeshRef; - - SubTreeRef(value_t path, value_t mask) - : super(path,mask) {} - - public: - - value_t getIndex() const { - // this is reversing the path 000ZYX to 1XYZ to get the usual - // order of nodes within a embedded tree - auto res = 1; - value_t cur = path; - for(unsigned i = 0; i>= 1; - } - return res; - } - - - SubTreeRef getParent() const { - assert_false(isRoot()); - SubTreeRef res = *this; - res.PathRefBase::mask = res.PathRefBase::mask & ~(1 << (getDepth()-1)); - return res; - } - - - template - void enumerate(const Body& body) { - - if (preOrder) body(*this); - - if (getDepth() < DepthLimit) { - getLeftChild().enumerate(body); - getRightChild().enumerate(body); - } - - if (!preOrder) body(*this); - - } - - - friend std::ostream& operator<<(std::ostream& out, const SubTreeRef& ref) { - out << "r"; - auto depth = ref.getDepth(); - for(value_t i = 0; i> i) % 2); - } - return out; - } - - }; - - - /** - * A reference to a continuously stored part of a mesh. - */ - class SubMeshRef : public PathRefBase { - - using super = PathRefBase; - - using value_t = uint32_t; - - friend super; - - SubMeshRef(value_t path, value_t mask) - : super(path,mask) {} - - public: - - SubMeshRef(const SubTreeRef& ref) - : super(ref.path, ref.mask) {} - - SubMeshRef getMasked(unsigned pos) const { - assert_lt(pos,getDepth()); - SubMeshRef res = *this; - res.super::mask = res.super::mask & ~(1< - void scan(const Body& body) const { - - // look for last 0 in mask - unsigned zeroPos = utils::countTrailingZeros(~super::mask); - if (zeroPos >= getDepth()) { - body(SubTreeRef(super::path,super::mask)); - return; - } - - // recursive - SubMeshRef copy = getUnmasked(zeroPos); - - // set bit to 0 - copy.super::path = copy.super::path & ~( 1 << zeroPos ); - copy.scan(body); - - // set bit to 1 - copy.super::path = copy.super::path | ( 1 << zeroPos ); - copy.scan(body); - } - - - template - void scan(const PartitionTree& ptree, const Body& body) const { - scan([&](const SubTreeRef& ref){ - ptree.template getNodeRange(ref).forAll(body); - }); - } - - - friend std::ostream& operator<<(std::ostream& out, const SubMeshRef& ref) { - out << "r"; - auto depth = ref.getDepth(); - for(value_t i = 0; i> i) % 2); - } else { - out << ".*"; - } - } - return out; - } - - }; - - /** - * A union of sub mesh references. - */ - class MeshRegion { - - template< - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned Levels, - unsigned PartitionDepth - > - friend class PartitionTree; - - std::vector refs; - - MeshRegion(const SubMeshRef* begin, const SubMeshRef* end) - : refs(begin,end) {} - - public: - - MeshRegion() {} - - MeshRegion(const SubMeshRef& ref) { - refs.push_back(ref); - } - - MeshRegion(std::initializer_list meshRefs) : refs(meshRefs) { - restoreSet(); - compress(); - } - - MeshRegion(const std::vector& refs) : refs(refs) { - restoreSet(); - compress(); - } - - bool operator==(const MeshRegion& other) const { - return this == &other || refs == other.refs || (difference(*this,other).empty() && difference(other,*this).empty()); - } - - bool operator!=(const MeshRegion& other) const { - return !(*this == other); - } - - const std::vector& getSubMeshReferences() const { - return refs; - } - - bool empty() const { - return refs.empty(); - } - - bool covers(const SubMeshRef& ref) const { - // cheap: one is covering the given reference - // expensive: the union of this and the reference is the same as this - return std::any_of(refs.begin(),refs.end(),[&](const SubMeshRef& a) { - return a.covers(ref); - }) || (merge(*this,MeshRegion(ref)) == *this); - } - - bool operator<(const MeshRegion& other) const { - return refs < other.refs; - } - - static MeshRegion merge(const MeshRegion& a, const MeshRegion& b) { - MeshRegion res; - std::set_union( - a.refs.begin(), a.refs.end(), - b.refs.begin(), b.refs.end(), - std::back_inserter(res.refs) - ); - res.compress(); - return res; - } - - template - static MeshRegion merge(const MeshRegion& a, const MeshRegion& b, const Rest& ... rest) { - return merge(merge(a,b),rest...); - } - - static MeshRegion intersect(const MeshRegion& a, const MeshRegion& b) { - - MeshRegion res; - - // compute pairwise intersections - for(const auto& ra : a.refs) { - for(const auto& rb : b.refs) { - auto tmp = ra; - if (tmp.tryIntersect(rb)) { - res.refs.push_back(tmp); - } - } - } - - // restore set invariant - res.restoreSet(); - - // compress the set representation - res.compress(); - return res; - } - - static MeshRegion difference(const MeshRegion& a, const MeshRegion& b) { - return intersect(a,complement(b)); - } - - static MeshRegion span(const MeshRegion&, const MeshRegion&) { - std::cout << "Scan operation not yet implemented!"; - exit(1); - } - - MeshRegion complement() const { - - MeshRegion res = SubMeshRef::root(); - - // aggregate the complements of all entries - for(const auto& cur : refs) { - - // compute the complement of the current entry - MeshRegion tmp; - cur.visitComplement([&](const SubMeshRef& ref) { - tmp.refs.push_back(ref); - }); - - // restore invariant - tmp.restoreSet(); - tmp.compress(); - - // intersect current complement with running complement - res = intersect(res,tmp); - } - - // done - return res; - } - - static MeshRegion complement(const MeshRegion& region) { - return region.complement(); - } - - /** - * An operator to load an instance of this region from the given archive. - */ - static MeshRegion load(utils::ArchiveReader&) { - assert_not_implemented(); - return MeshRegion(); - } - - /** - * An operator to store an instance of this region into the given archive. - */ - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - // nothing so far - } - - template - void scan(const Body& body) const { - for(const auto& cur : refs) { - cur.scan(body); - } - } - - template - void scan(const PartitionTree& ptree, const Body& body) const { - for(const auto& cur : refs) { - cur.scan(ptree,body); - } - } - - - friend std::ostream& operator<<(std::ostream& out, const MeshRegion& reg) { - return out << reg.refs; - } - - private: - - void compress() { - - // check precondition - assert_true(std::is_sorted(refs.begin(),refs.end())); - - // Phase 1: remove redundant entries - removeCovered(); - - // Phase 2: collapse adjacent entries (iteratively) - while (collapseSiblings()) {} - } - - - bool removeCovered() { - - // see whether any change happend - bool changed = false; - for(std::size_t i = 0; i, - unsigned Levels = 1, - unsigned depth = 12 - > - class PartitionTree; - - template< - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned Levels, - unsigned depth - > - class PartitionTree { - - static_assert(detail::is_nodes::value, - "First template argument of PartitionTree must be of type nodes<...>"); - - static_assert(detail::is_edges::value, - "Second template argument of PartitionTree must be of type edges<...>"); - - static_assert(detail::is_hierarchies::value, - "Third template argument of PartitionTree must be of type hierarchies<...>"); - - }; - - template< - typename ... Nodes, - typename ... Edges, - typename ... Hierarchies, - unsigned Levels, - unsigned PartitionDepth - > - class PartitionTree,edges,hierarchies,Levels,PartitionDepth> { - - public: - - enum { depth = PartitionDepth }; - - private: - - // an internal construct to store node ranges - struct RangeStore { - NodeID begin; - NodeID end; - }; - - // an internal construct to store regions in open and - // closed structure - // - open: the region pointer is referencing the stored region - // - closed: the begin and end indices reference and interval of an externally maintained - // list of regions - struct RegionStore { - - // -- open -- - MeshRegion* region; // the ownership is managed by the enclosing tree - - // -- closed -- - std::size_t offset; - std::size_t length; - - RegionStore() - : region(nullptr), offset(0), length(0) {} - - MeshRegion toRegion(const SubMeshRef* references) const { - if (region) return *region; - auto start = references + offset; - auto end = start + length; - return MeshRegion(start,end); - } - - RegionStore& operator=(const MeshRegion& value) { - if (!region) region = new MeshRegion(); - *region = value; - return *this; - } - }; - - - static_assert(Levels > 0, "There must be at least one level!"); - - struct LevelInfo { - - utils::StaticMap,RangeStore> nodeRanges; - - utils::StaticMap,RegionStore> forwardClosure; - utils::StaticMap,RegionStore> backwardClosure; - - utils::StaticMap,RegionStore> parentClosure; - utils::StaticMap,RegionStore> childClosure; - - }; - - struct Node { - - std::array data; - - }; - - // some preconditions required for the implementation of this class to work - static_assert(std::is_trivially_copyable::value, "RangeStore should be trivially copyable!"); - static_assert(std::is_trivially_copyable::value, "RegionStore should be trivially copyable!"); - static_assert(std::is_trivially_copyable::value, "LevelInfo should be trivially copyable!" ); - static_assert(std::is_trivially_copyable::value, "Nodes should be trivially copyable!" ); - static_assert(std::is_trivially_copyable::value, "SubMeshRefs should be trivially copyable!"); - - enum { num_elements = 1ul << (depth + 1) }; - - bool owned; - - Node* data; - - std::size_t numReferences; - - SubMeshRef* references; - - PartitionTree(Node* data, std::size_t numReferences, SubMeshRef* references) - : owned(false), data(data), numReferences(numReferences), references(references) { - assert_true(data); - assert_true(references); - } - - public: - - PartitionTree() : owned(true), data(new Node[num_elements]), numReferences(0), references(nullptr) {} - - ~PartitionTree() { - if (owned) { - delete [] data; - free(references); - } - } - - PartitionTree(const PartitionTree&) = delete; - - PartitionTree(PartitionTree&& other) - : owned(other.owned), - data(other.data), - numReferences(other.numReferences), - references(other.references) { - - // free other from ownership - other.owned = false; - other.data = nullptr; - other.references = nullptr; - } - - PartitionTree& operator=(const PartitionTree&) = delete; - - PartitionTree& operator=(PartitionTree&& other) { - assert_ne(this,&other); - - // swap content and ownership - std::swap(owned,other.owned); - numReferences = other.numReferences; - std::swap(data,other.data); - std::swap(references,other.references); - - // done - return *this; - } - - bool isClosed() const { - return references != nullptr; - } - - void close() { - // must not be closed for now - assert_false(isClosed()); - - // a utility to apply an operation on each mesh region - auto forEachMeshRegion = [&](const auto& op) { - for(std::size_t i=0; igetSubMeshReferences().size(); - }); - - // create reference buffer - references = static_cast(malloc(sizeof(SubMeshRef) * numReferences)); - if (!references) { - throw "Unable to allocate memory for managing references!"; - } - - // transfer ownership of SubMeshRefs to reference buffer - std::size_t offset = 0; - forEachMeshRegion([&](RegionStore& cur){ - - // check whether there is a region - if (!cur.region) { - cur.offset = 0; - cur.length = 0; - return; - } - - // close the region - const auto& refs = cur.region->getSubMeshReferences(); - cur.offset = offset; - cur.length = refs.size(); - for(auto& cur : refs) { - // placement new for this reference - new (&references[offset++]) SubMeshRef(cur); - } - - // delete old region - delete cur.region; - cur.region = nullptr; - }); - - // make sure counting and transferring covered the same number of references - assert_eq(numReferences, offset); - } - - template - NodeRange getNodeRange(const SubTreeRef& ref = SubTreeRef::root()) const { - assert_lt(ref.getIndex(),num_elements); - auto range = data[ref.getIndex()].data[Level].nodeRanges.template get(); - return { - NodeRef{ range.begin }, - NodeRef{ range.end } - }; - } - - template - void setNodeRange(const SubTreeRef& ref, const NodeRange& range) { - auto& locRange = getNode(ref).data[Level].nodeRanges.template get(); - locRange.begin = range.getBegin(); - locRange.end = range.getEnd(); - } - - template - MeshRegion getForwardClosure(const SubTreeRef& ref) const { - return getNode(ref).data[Level].forwardClosure.template get().toRegion(references); - } - - template - void setForwardClosure(const SubTreeRef& ref, const MeshRegion& region) { - getNode(ref).data[Level].forwardClosure.template get() = region; - } - - template - MeshRegion getBackwardClosure(const SubTreeRef& ref) const { - return getNode(ref).data[Level].backwardClosure.template get().toRegion(references); - } - - template - void setBackwardClosure(const SubTreeRef& ref, const MeshRegion& region) { - getNode(ref).data[Level].backwardClosure.template get() = region; - } - - template - MeshRegion getParentClosure(const SubTreeRef& ref) const { - return getNode(ref).data[Level].parentClosure.template get().toRegion(references); - } - - template - void setParentClosure(const SubTreeRef& ref, const MeshRegion& region) { - getNode(ref).data[Level].parentClosure.template get() = region; - } - - - template - MeshRegion getChildClosure(const SubTreeRef& ref) const { - return getNode(ref).data[Level].childClosure.template get().toRegion(references); - } - - template - void setChildClosure(const SubTreeRef& ref, const MeshRegion& region) { - getNode(ref).data[Level].childClosure.template get() = region; - } - - - template - void visitPreOrder(const Body& body) { - SubTreeRef::root().enumerate(body); - } - - template - void visitPostOrder(const Body& body) { - SubTreeRef::root().enumerate(body); - } - - // -- serialization support for network transferes -- - - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - } - - static PartitionTree load(utils::ArchiveReader&) { - assert_not_implemented(); - return PartitionTree(); - } - - // -- load / store for files -- - - void store(std::ostream& out) const { - - // start by writing out number of references - out.write(reinterpret_cast(&numReferences),sizeof(numReferences)); - - // continue with node information - out.write(reinterpret_cast(data),sizeof(Node)*num_elements); - - // and end with references - out.write(reinterpret_cast(references),sizeof(SubMeshRef)*numReferences); - - } - - static PartitionTree load(std::istream& in) { - - // create the resulting tree (owning all its data) - PartitionTree res; - - // read in number of references - in.read(reinterpret_cast(&res.numReferences),sizeof(res.numReferences)); - - // load nodes - in.read(reinterpret_cast(res.data),sizeof(Node)*num_elements); - - // load references - res.references = reinterpret_cast(malloc(sizeof(SubMeshRef)*res.numReferences)); - in.read(reinterpret_cast(res.references),sizeof(SubMeshRef)*res.numReferences); - - // done - return res; - } - - static PartitionTree interpret(utils::RawBuffer& raw) { - - // get size - std::size_t numReferences = raw.consume(); - - // get nodes - Node* nodes = raw.consumeArray(num_elements); - - // get references - SubMeshRef* references = raw.consumeArray(numReferences); - - // wrap up results - return PartitionTree(nodes,numReferences,references); - } - - - private: - - const Node& getNode(const SubTreeRef& ref) const { - assert_lt(ref.getIndex(),num_elements); - return data[ref.getIndex()]; - } - - Node& getNode(const SubTreeRef& ref) { - assert_lt(ref.getIndex(),num_elements); - return data[ref.getIndex()]; - } - - }; - - - class NaiveMeshPartitioner { - - public: - - template< - unsigned PartitionDepth, - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned Levels - > - PartitionTree partition(const MeshTopologyData& data) const { - - // create empty partition tree - PartitionTree res; - - // set up node ranges for partitions - data.forAllNodeKinds([&](const auto& nodeKind, const auto& level) { - - // get node kind and level - using NodeKind = plain_type; - // not directly accessing lvl::value here, as MSVC 15 refuses to acknowledge its constexpr-ness - using lvl = get_level; - - // set root node to cover the full range - auto num_nodes = data.template getNumNodes(); - res.template setNodeRange( - SubTreeRef::root(), - NodeRange( - NodeRef{ 0 }, - NodeRef{ NodeID((node_index_t)num_nodes) } - ) - ); - - // recursively sub-divide ranges - res.visitPreOrder([&](const SubTreeRef& ref) { - - if (ref.isRoot()) return; - - // get the range of the parent - auto range = res.template getNodeRange(ref.getParent()); - - // extract begin / end - auto begin = range.getBegin(); - auto end = range.getEnd(); - - // compute mid - auto mid = NodeRef(begin.id + (end.id - begin.id) / 2); - - // get range for this node - if (ref.isLeftChild()) { - range = NodeRange(begin,mid); - } else { - range = NodeRange(mid,end); - } - - // update the range - res.template setNodeRange(ref,range); - - }); - - }); - - // set up closures for edges - data.forAllEdgeKinds([&](const auto& edgeKind, const auto& level) { - - // get edge kind and level - using EdgeKind = plain_type; - // not directly accessing lvl::value here, as MSVC 15 refuses to acknowledge its constexpr-ness - using lvl = get_level; - - // the closure is everything for now - MeshRegion closure = SubMeshRef::root(); - - // initialize all the closured with the full region - res.visitPreOrder([&](const SubTreeRef& ref) { - // fix forward closure - res.template setForwardClosure(ref,closure); - - // fix backward closure - res.template setBackwardClosure(ref,closure); - }); - - }); - - - // set up closures for hierarchies - data.forAllHierarchyKinds([&](const auto& hierarchyKind, const auto& level) { - - // get hierarchy kind and level - using HierarchyKind = plain_type; - // not directly accessing lvl::value here, as MSVC 15 refuses to acknowledge its constexpr-ness - using lvl = get_level; - - // make sure this is not called for level 0 - assert_gt(lvl::value,0) << "There should not be any hierarchies on level 0."; - - // the closure is everything for now - MeshRegion closure = SubMeshRef::root(); - - // initialize all the closured with the full region - res.visitPreOrder([&](const SubTreeRef& ref) { - - // fix parent closure - res.template setParentClosure(ref,closure); - - // fix child closure - res.template setChildClosure(ref,closure); - }); - - }); - - // close the data representation - res.close(); - - // done - return res; - } - - }; - - - template< - typename NodeKind, - typename ElementType, - unsigned Level, - typename PartitionTree - > - class MeshDataFragment { - public: - - using facade_type = MeshData; - using region_type = MeshRegion; - using shared_data_type = PartitionTree; - - private: - - using partition_tree_type = PartitionTree; - - const partition_tree_type& partitionTree; - - region_type coveredRegion; - - std::vector data; - - friend facade_type; - - public: - - MeshDataFragment() = delete; - - MeshDataFragment(const partition_tree_type& ptree, const region_type& region) - : partitionTree(ptree), coveredRegion(region) { - - // get upper boundary of covered node ranges - std::size_t max = 0; - region.scan([&](const SubTreeRef& cur){ - max = std::max(max,ptree.template getNodeRange(cur).getEnd().id); - }); - - // resize data storage - data.resize(max); - - } - - private: - - MeshDataFragment(const partition_tree_type& ptree, std::vector&& data) - : partitionTree(ptree), coveredRegion(SubMeshRef::root()), data(std::move(data)) {} - - public: - - MeshDataFragment(const MeshDataFragment&) = delete; - MeshDataFragment(MeshDataFragment&&) = default; - - MeshDataFragment& operator=(const MeshDataFragment&) = delete; - MeshDataFragment& operator=(MeshDataFragment&&) = default; - - - facade_type mask() { - return facade_type(*this); - } - - const region_type& getCoveredRegion() const { - return coveredRegion; - } - - const ElementType& operator[](const NodeRef& id) const { - return data[id.getOrdinal()]; - } - - ElementType& operator[](const NodeRef& id) { - return data[id.getOrdinal()]; - } - - std::size_t size() const { - return data.size(); - } - - void resize(const region_type&) { - - } - - void insert(const MeshDataFragment& other, const region_type& area) { - assert_true(core::isSubRegion(area,other.coveredRegion)) << "New data " << area << " not covered by source of size " << coveredRegion << "\n"; - assert_true(core::isSubRegion(area,coveredRegion)) << "New data " << area << " not covered by target of size " << coveredRegion << "\n"; - - assert_not_implemented(); - std::cout << core::isSubRegion(area,other.coveredRegion); - -// // copy data line by line using memcpy -// area.scanByLines([&](const point& a, const point& b){ -// auto start = flatten(a); -// auto length = (flatten(b) - start) * sizeof(T); -// std::memcpy(&data[start],&other.data[start],length); -// }); - } - - void extract(utils::ArchiveWriter&, const region_type&) const { - assert_not_implemented(); - } - - void insert(utils::ArchiveReader&) { - assert_not_implemented(); - } - - - // -- load / store for files -- - - void store(std::ostream& out) const { - - // check that the element type is a trivial type - assert_true(std::is_trivial::value) - << "Sorry, only trivial types may be stored through this infrastructure."; - - // this fragment is required to cover the entire mesh - assert_eq(coveredRegion, SubMeshRef::root()); - - // write covered data to output stream - utils::write(out,data.size()); - utils::write(out,data.begin(),data.end()); - } - - static MeshDataFragment load(const partition_tree_type& ptree, std::istream& in) { - // restore the data buffer - std::size_t size = utils::read(in); - std::vector data(size); - utils::read(in,data.begin(),data.end()); - - // create the data fragment - return MeshDataFragment(ptree,std::move(data)); - } - - static MeshDataFragment interpret(const partition_tree_type& ptree, utils::RawBuffer& raw) { - - // TODO: when exchanging the vector by some manageable structure, replace this - // For now: we copy the data - - // copy the data buffer - std::size_t size = raw.consume(); - auto start = raw.consumeArray(size); - std::vector data(start, start + size); - - // create the data fragment - return MeshDataFragment(ptree,std::move(data)); - } - - }; - - - /** - * An entity to reference the full range of a scan. This token - * can not be copied and will wait for the completion of the scan upon destruction. - */ - class scan_reference { - - core::treeture handle; - - public: - - scan_reference(core::treeture&& handle) - : handle(std::move(handle)) {} - - scan_reference() {}; - scan_reference(const scan_reference&) = delete; - scan_reference(scan_reference&&) = default; - - scan_reference& operator=(const scan_reference&) = delete; - scan_reference& operator=(scan_reference&&) = default; - - ~scan_reference() { handle.wait(); } - - void wait() const { handle.wait(); } - - }; - - } // end namespace detail - - template< - typename NodeKind, - typename ElementType, - unsigned Level, - typename PartitionTree - > - class MeshData : public core::data_item> { - - template - friend class Mesh; - - public: - - using node_kind = NodeKind; - - using element_type = ElementType; - - using fragment_type = detail::MeshDataFragment; - - private: - - std::unique_ptr owned; - - fragment_type* data; - - - friend fragment_type; - - MeshData(fragment_type& data) : data(&data) {} - - MeshData(std::unique_ptr&& data) : owned(std::move(data)), data(owned.get()) {} - - MeshData(const PartitionTree& ptree, const detail::MeshRegion& region) - : owned(std::make_unique(ptree,region)), data(owned.get()) {} - - public: - - const ElementType& operator[](const NodeRef& id) const { - return (*data)[id]; - } - - ElementType& operator[](const NodeRef& id) { - return (*data)[id]; - } - - std::size_t size() const { - return (*data).size(); - } - - - void store(std::ostream& out) const { - // ensure that the data is owned - assert_true(owned) << "Only supported when data is owned (not managed by some Data Item Manager)"; - owned->store(out); - } - - static MeshData load(const PartitionTree& ptree, std::istream& in) { - return std::make_unique(fragment_type::load(ptree,in)); - } - - static MeshData interpret(const PartitionTree& ptree, utils::RawBuffer& raw) { - return std::make_unique(fragment_type::interpret(ptree,raw)); - } - }; - - - /** - * The default implementation of a mesh is capturing all ill-formed parameterizations - * of the mesh type to provide cleaner compiler errors. - */ - template< - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned Levels, - unsigned PartitionDepth - > - class Mesh { - - static_assert(detail::is_nodes::value, - "First template argument of Mesh must be of type nodes<...>"); - - static_assert(detail::is_edges::value, - "Second template argument of Mesh must be of type edges<...>"); - - static_assert(detail::is_hierarchies::value, - "Third template argument of Mesh must be of type hierarchies<...>"); - - }; - - - /** - * The type for representing the topological information of a hierarchical mesh. - */ - template< - typename ... NodeKinds, - typename ... EdgeKinds, - typename ... Hierarchies, - unsigned Levels, - unsigned PartitionDepth - > - class Mesh,edges,hierarchies,Levels,PartitionDepth> { - - static_assert(Levels > 0, "There must be at least one level!"); - - public: - - using topology_type = detail::MeshTopologyData,edges,hierarchies,Levels>; - - using partition_tree_type = detail::PartitionTree,edges,hierarchies,Levels,PartitionDepth>; - - template - using mesh_data_type = MeshData; - - using builder_type = MeshBuilder,edges,hierarchies,Levels>; - - friend builder_type; - - enum { levels = Levels }; - - private: - - partition_tree_type partitionTree; - - topology_type data; - - Mesh(topology_type&& data, partition_tree_type&& partitionTree) - : partitionTree(std::move(partitionTree)), data(std::move(data)) { - assert_true(data.isClosed()); - } - - public: - - // -- ctors / dtors / assignments -- - - Mesh(const Mesh&) = delete; - Mesh(Mesh&&) = default; - - Mesh& operator=(const Mesh&) = delete; - Mesh& operator=(Mesh&&) = default; - - - // -- provide access to components -- - - const topology_type& getTopologyData() const { - return data; - } - - const partition_tree_type& getPartitionTree() const { - return partitionTree; - } - - // -- mesh querying -- - - template - std::size_t getNumNodes() const { - return data.template getNumNodes(); - } - - // -- mesh interactions -- - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::trg_node_kind - > - NodeList getSinks(const NodeRef& a) const { - return data.edgeSets.template getSinks(a); - } - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::trg_node_kind - > - NodeRef getSink(const NodeRef& a) const { - const auto& list = getSinks(a); - assert_eq(list.size(),1); - return list.front(); - } - - template< - typename EdgeKind, - typename B, - unsigned Level, - typename A = typename EdgeKind::src_node_kind - > - NodeList getSources(const NodeRef& b) const { - return data.edgeSets.template getSources(b); - } - - template< - typename EdgeKind, - typename B, - unsigned Level, - typename A = typename EdgeKind::src_node_kind - > - NodeRef getSource(const NodeRef& b) const { - const auto& list = getSources(b); - assert_eq(list.size(),1); - return list.front(); - } - - // -- overloading of getNeighbor convenience functions (aliases of getSink / getSource ) -- - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::trg_node_kind - > - std::enable_if_t::value,NodeRef> - getNeighbor(const NodeRef& a) const { - return getSink(a); - } - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::trg_node_kind - > - std::enable_if_t::value,NodeList> - getNeighbors(const NodeRef& a) const { - return getSinks(a); - } - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::src_node_kind - > - std::enable_if_t::value,NodeRef> - getNeighbor(const NodeRef& a) const { - return getSource(a); - } - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::src_node_kind - > - std::enable_if_t::value,NodeList> - getNeighbors(const NodeRef& a) const { - return getSources(a); - } - - // -- parent / children relation -- - - template< - typename Hierarchy, - typename A, unsigned Level, - typename B = typename Hierarchy::parent_node_kind - > - NodeRef getParent(const NodeRef& a) const { - return data.hierarchySets.template getParent(a); - } - - template< - typename Hierarchy, - typename A, unsigned Level, - typename B = typename Hierarchy::child_node_kind - > - NodeList getChildren(const NodeRef& a) const { - return data.hierarchySets.template getChildren(a); - } - - /** - * A sequential operation calling the given body for each node of the given kind - * on the given level in parallel. - * - * NOTE: this operation is processed sequentially, and can thus not be distributed - * among multiple nodes. Use pforAll instead - * - * @tparam Kind the kind of node to be visited - * @tparam Level the level of the mesh to be addressed - * @tparam Body the type of operation to be applied on each node - * - * @param body the operation to be applied on each node of the selected kind and level - * @return a scan reference for synchronizing upon the asynchronously processed operation - */ - template - void forAll(const Body& body) const { - // iterate over all selected elements - for(const auto& cur : partitionTree.template getNodeRange(detail::SubTreeRef::root())) { - body(cur); - } - } - - /** - * A parallel operation calling the given body for each node of the given kind - * on the given level in parallel. - * - * This is the main operator for iterating over nodes within a mesh. All visits - * will always be conducted in parallel. - * - * @tparam Kind the kind of node to be visited - * @tparam Level the level of the mesh to be addressed - * @tparam Body the type of operation to be applied on each node - * - * @param body the operation to be applied on each node of the selected kind and level - * @return a scan reference for synchronizing upon the asynchronously processed operation - */ - template - detail::scan_reference pforAll(const Body& body) const { - - using range = detail::SubTreeRef; - - return core::prec( - // -- base case test -- - [](const range& a){ - // when we reached a leaf, we are at the bottom - return a.getDepth() == PartitionDepth; - }, - // -- base case -- - [&](const range& a){ - // apply the body to the elements of the current range - for(const auto& cur : partitionTree.template getNodeRange(a)) { - body(cur); - } - }, - // -- step case -- - core::pick( - // -- split -- - [](const range& a, const auto& rec){ - return core::parallel( - rec(a.getLeftChild()), - rec(a.getRightChild()) - ); - }, - // -- serialized step case (optimization) -- - [&](const range& a, const auto&){ - // apply the body to the elements of the current range - for(const auto& cur : partitionTree.template getNodeRange(a)) { - body(cur); - } - } - ) - )(detail::SubTreeRef::root()); - } - - template - typename utils::lambda_traits::result_type preduce( - const MapOp& map, - const ReduceOp& reduce, - const InitLocalState& init, - const ReduceLocalState& exit) const { - typedef typename utils::lambda_traits::result_type res_type; - - using range = detail::SubTreeRef; - - auto handle = [](const InitLocalState& init, const MapOp& map, const ReduceLocalState& exit, const range& a, - const partition_tree_type& partitionTree)->res_type { - auto res = init(); - auto mapB = [map,&res](const auto& cur) { - return map(cur,res); - }; - for(const auto& cur : partitionTree.template getNodeRange(a)) { - mapB(cur); - } - return exit(res); - }; - - - // implements a binary splitting policy for iterating over the given iterator range - return core::prec( - [](const range& a) { - return a.getDepth() == PartitionDepth; - }, - [&](const range& a)->res_type { - return handle(init, map, exit, a, partitionTree); - }, - core::pick( - [reduce](const range& a, const auto& nested) { - // here we have the splitting - auto left = a.getLeftChild(); - auto right = a.getRightChild(); - -// return user::add(nested(left), nested(right)); - return core::combine(std::move(nested(left)),std::move(nested(right)),reduce); - }, - [&](const range& a, const auto&)->res_type { - return handle(init, map, exit, a, partitionTree); - } - ) - )(detail::SubTreeRef::root()).get(); - } - - template - typename utils::lambda_traits::result_type preduce( - const MapOp& map, - const ReduceOp& reduce, - const InitLocalState& init) const { - return preduce(map, reduce, init, [](typename utils::lambda_traits::result_type a) { return a; }); - } - - template - typename utils::lambda_traits::result_type preduce( - const MapOp& map, - const ReduceOp& reduce) const { - typedef typename utils::lambda_traits::result_type res_type; - - return preduce(map, reduce, [](){ return res_type(); }, [](res_type a) { return a; }); - } - - // -- mesh data -- - - template - MeshData createNodeData() const { - return MeshData(partitionTree,detail::SubMeshRef::root()); - } - - template - std::array, N> createNodeDataArray() const { - return utils::build_array([&] { return MeshData(partitionTree,detail::SubMeshRef::root()); } ); - } - - template - MeshData loadNodeData(std::istream& in) const { - return MeshData::load(partitionTree,in); - } - - template - MeshData interpretNodeData(utils::RawBuffer& raw) const { - return MeshData::interpret(partitionTree,raw); - } - - - // -- mesh property handling -- - - template - MeshProperties createProperties() const { - return MeshProperties(*this); - } - - template - MeshProperties loadProperties(std::istream& in) const { - return MeshProperties::load(*this,in); - } - - template - MeshProperties interpretProperties(utils::RawBuffer& raw) const { - return MeshProperties::interpret(*this,raw); - } - - // -- load / store for files -- - - void store(std::ostream& out) const { - - // write partition tree - partitionTree.store(out); - - // write topological data - data.store(out); - - } - - static Mesh load(std::istream& in) { - - // interpret the partition tree - auto partitionTree = partition_tree_type::load(in); - - // load topological data - auto topologyData = topology_type::load(in); - - // create result - return Mesh( - std::move(topologyData), - std::move(partitionTree) - ); - - } - - static Mesh interpret(utils::RawBuffer& raw) { - - // interpret the partition tree - auto partitionTree = partition_tree_type::interpret(raw); - - // load topological data - auto topologyData = topology_type::interpret(raw); - - // create result - return Mesh( - std::move(topologyData), - std::move(partitionTree) - ); - - } - - }; - - - - /** - * The default implementation of a mesh build is capturing all ill-formed parameterizations - * of the mesh builder type to provide cleaner compiler errors. - */ - template< - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned layers - > - class MeshBuilder { - - static_assert(detail::is_nodes::value, - "First template argument of MeshBuilder must be of type nodes<...>"); - - static_assert(detail::is_edges::value, - "Second template argument of MeshBuilder must be of type edges<...>"); - - static_assert(detail::is_hierarchies::value, - "Third template argument of MeshBuilder must be of type hierarchies<...>"); - - }; - - /** - * A utility to construct meshes. - */ - template< - typename ... NodeKinds, - typename ... EdgeKinds, - typename ... Hierarchies, - unsigned Levels - > - class MeshBuilder,edges,hierarchies,Levels> { - - static_assert(Levels > 0, "There must be at least one level!"); - - public: - - template - using mesh_type = Mesh,edges,hierarchies,Levels,PartitionDepth>; - - using topology_type = detail::MeshTopologyData,edges,hierarchies,Levels>; - - private: - - topology_type data; - - public: - - // -- mesh modeling -- - - template - NodeRef create() { - // TODO: check that Kind is a valid node kind - static_assert(Level < Levels, "Trying to create a node on invalid level."); - return data.nodeSets.template create(); - } - - template - NodeRange create(unsigned num) { - // TODO: check that Kind is a valid node kind - static_assert(Level < Levels, "Trying to create a node on invalid level."); - return data.nodeSets.template create(num); - } - - template - void link(const NodeRef& a, const NodeRef& b) { - // TODO: check that EdgeKind is a valid edge kind - static_assert(Level < Levels, "Trying to create an edge on invalid level."); - static_assert(std::is_same::value, "Invalid source node type"); - static_assert(std::is_same::value, "Invalid target node type"); - return data.edgeSets.template addEdge(a,b); - } - - template - void link(const NodeRef& parent, const NodeRef& child) { - // TODO: check that HierarchyKind is a valid hierarchy kind - static_assert(LevelA == LevelB+1, "Can not connect nodes of non-adjacent levels in hierarchies"); - static_assert(LevelA < Levels, "Trying to create a hierarchical edge to an invalid level."); - static_assert(std::is_same::value, "Invalid source node type"); - static_assert(std::is_same::value, "Invalid target node type"); - return data.hierarchySets.template addChild(parent,child); - } - - // -- build mesh -- - - template - mesh_type build(const Partitioner& partitioner) const & { - - // close the topological data - topology_type meshData = data; - meshData.close(); - - // partition the mesh - auto partitionTree = partitioner.template partition(meshData); - - return mesh_type(std::move(meshData), std::move(partitionTree)); - } - - template - mesh_type build() const & { - return build(detail::NaiveMeshPartitioner()); - } - - - template - mesh_type build(const Partitioner& partitioner) && { - - // partition the mesh - auto partitionTree = partitioner.template partition(data); - - return mesh_type(std::move(data), std::move(partitionTree)); - } - - template - mesh_type build() const && { - return std::move(*this).template build(detail::NaiveMeshPartitioner()); - } - - }; - - - // -- Mesh Property Collections -------------------------------------- - - - // TODO: reduce the template instantiations complexity of this code. - - namespace detail { - - template - class MeshPropertiesData { - - using property_list = utils::type_list; - - template - using mesh_data_type = MeshData; - - using data_t = std::tuple...>; - - data_t data; - - MeshPropertiesData(data_t&& data) : data(std::move(data)) {} - - public: - - template - MeshPropertiesData(const Mesh& mesh) - : data(mesh.template createNodeData()...) {} - - template - mesh_data_type& get() { - return std::get::value>(data); - } - - template - const mesh_data_type& get() const { - return std::get::value>(data); - } - - void store(std::ostream& out) const { - // write property data - utils::forEach(data,[&](const auto& entry){ - entry.store(out); - }); - } - - template - static MeshPropertiesData load(const Mesh& mesh, std::istream& in) { - // a temporary tuple type to be filled with temporary results - using tmp_data_type = std::tuple>...>; - - // load property data - tmp_data_type data; - utils::forEach(data,[&](auto& entry){ - // load data - using data_type = typename std::remove_reference_t::element_type; - using node_kind = typename data_type::node_kind; - using value_type = typename data_type::element_type; - entry = std::make_unique(mesh.template loadNodeData(in)); - }); - - // move data to tuple - return MeshPropertiesData(utils::map(data,[&](auto& entry){ - return std::move(*entry.get()); - })); - } - - template - static MeshPropertiesData interpret(const Mesh& mesh, utils::RawBuffer& raw) { - // a temporary tuple type to be filled with temporary results - using tmp_data_type = std::tuple>...>; - - // load property data - tmp_data_type data; - utils::forEach(data,[&](auto& entry){ - // load data - using data_type = typename std::remove_reference_t::element_type; - using node_kind = typename data_type::node_kind; - using value_type = typename data_type::element_type; - entry = std::make_unique(mesh.template interpretNodeData(raw)); - }); - - // move data to tuple - return MeshPropertiesData(utils::map(data,[&](auto& entry){ - return std::move(*entry.get()); - })); - } - - }; - - template - class MeshPropertiesLevels { - - template - using level_data = MeshPropertiesData; - - using nested_level_type = MeshPropertiesLevels; - - level_data data; - - nested_level_type nested; - - MeshPropertiesLevels(level_data&& data, nested_level_type&& nested) - : data(std::move(data)), nested(std::move(nested)) {} - - public: - - template - MeshPropertiesLevels(const Mesh& mesh) - : data(mesh), nested(mesh) {} - - template - std::enable_if_t>& - get() { - return data; - } - - template - const std::enable_if_t>& - get() const { - return data; - } - - template - std::enable_if_t>& - get() { - return nested.template get(); - } - - template - const std::enable_if_t>& - get() const { - return nested.template get(); - } - - void store(std::ostream& out) const { - // write property data - data.store(out); - // write nested data - nested.store(out); - } - - - template - static MeshPropertiesLevels load(const Mesh& mesh, std::istream& in) { - // load property data - auto data = level_data::load(mesh,in); - // load nested data - auto nested = nested_level_type::load(mesh,in); - // build level data - return MeshPropertiesLevels(std::move(data),std::move(nested)); - } - - template - static MeshPropertiesLevels interpret(const Mesh& mesh, utils::RawBuffer& raw) { - // interpret property data - auto data = level_data::interpret(mesh,raw); - // interpret nested data - auto nested = nested_level_type::interpret(mesh,raw); - // build level data - return MeshPropertiesLevels(std::move(data),std::move(nested)); - } - - }; - - - template - class MeshPropertiesLevels { - - using level_data = MeshPropertiesData; - - level_data data; - - MeshPropertiesLevels(level_data&& data) : data(std::move(data)) {} - - public: - - template - MeshPropertiesLevels(const Mesh& mesh) - : data(mesh) {} - - template - std::enable_if_t& - get() { - return data; - } - - template - const std::enable_if_t& - get() const { - return data; - } - - void store(std::ostream& out) const { - // write property data - data.store(out); - } - - template - static MeshPropertiesLevels load(const Mesh& mesh, std::istream& in) { - // load property data - return level_data::load(mesh,in); - } - - template - static MeshPropertiesLevels interpret(const Mesh& mesh, utils::RawBuffer& raw) { - // interpret property data - return level_data::interpret(mesh,raw); - } - - }; - - } - - template - class MeshProperties { - - template - friend class Mesh; - - using DataStore = detail::MeshPropertiesLevels; - - DataStore data; - - template - MeshProperties(const Mesh& mesh) : data(mesh) {} - - MeshProperties(DataStore&& data) : data(std::move(data)) {} - - public: - - template - MeshData& - get() { - return data.template get().template get(); - } - - template - const MeshData& - get() const { - return data.template get().template get(); - } - - template - typename Property::value_type& get(const NodeRef& node) { - return get()[node]; - } - - template - const typename Property::value_type& get(const NodeRef& node) const { - return get()[node]; - } - - // -- load / store for files -- - - void store(std::ostream& out) const { - // write property data - data.store(out); - } - - template - static MeshProperties load(const Mesh& mesh, std::istream& in) { - // forward call to data store - return MeshProperties(DataStore::load(mesh,in)); - } - - template - static MeshProperties interpret(const Mesh& mesh, utils::RawBuffer& raw) { - // forward call to data store - return MeshProperties(DataStore::interpret(mesh,raw)); - } - }; - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/data/scalar.h b/vendor/allscale/api/user/data/scalar.h deleted file mode 100644 index a8bcee806..000000000 --- a/vendor/allscale/api/user/data/scalar.h +++ /dev/null @@ -1,216 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/api/core/data.h" - -#include "allscale/utils/assert.h" -#include "allscale/utils/printer/join.h" -#include "allscale/utils/large_array.h" -#include "allscale/utils/vector.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - // --------------------------------------------------------- - // Declarations - // --------------------------------------------------------- - - - /** - * A data item wrapper for scalar values. - */ - template - class Scalar; - - - // --------------------------------------------------------- - // Definitions - // --------------------------------------------------------- - - - namespace detail { - - /** - * The type utilized to address regions of scalar data items. The region - * defines the unit region of either being present or not. - */ - class ScalarRegion { - - // indicating whether the value is present or not - bool flag; - - public: - - ScalarRegion() = default; - - ScalarRegion(bool value) : flag(value) {} - - bool operator==(const ScalarRegion& other) const { - return flag == other.flag; - } - - bool operator!=(const ScalarRegion& other) const { - return flag != other.flag; - } - - /** - * The empty check returns true if the value is not present. - */ - bool empty() const { - return !flag; - } - - static ScalarRegion merge(const ScalarRegion& a, const ScalarRegion& b) { - return { a.flag || b.flag }; - } - - static ScalarRegion intersect(const ScalarRegion& a, const ScalarRegion& b) { - return { a.flag && b.flag }; - } - - static ScalarRegion difference(const ScalarRegion& a, const ScalarRegion& b) { - return a.flag && !b.flag; - } - - static ScalarRegion span(const ScalarRegion& a, const ScalarRegion& b) { - return merge(a,b); - } - - /** - * An operator to load an instance of this range from the given archive. - */ - static ScalarRegion load(utils::ArchiveReader& reader) { - return reader.read(); - } - - /** - * An operator to store an instance of this range into the given archive. - */ - void store(utils::ArchiveWriter& writer) const { - writer.write(flag); - } - - friend std::ostream& operator<<(std::ostream& out, const ScalarRegion& region) { - return out << (region.flag ? "+" : "-"); - } - - }; - - - /** - * A scalar data item fragment provides the capability of maintaining a copy of - * the covered scalar value. - */ - template - class ScalarFragment { - - // the stored value - T value; - - // the region covered -- thus, indicating whether the value is present or not - ScalarRegion covered; - - friend class Scalar; - - public: - - using region_type = ScalarRegion; - using shared_data_type = core::no_shared_data; - using facade_type = Scalar; - - ScalarFragment(const core::no_shared_data&, const ScalarRegion& region = ScalarRegion()) - : covered(region) {} - - const ScalarRegion& getCoveredRegion() const { - return covered; - } - - void resize(const ScalarRegion& newSize) { - covered = newSize; - } - - void insert(const ScalarFragment& f, const ScalarRegion& region) { - assert_false(covered.empty()); - if (region.empty()) return; - value = f.value; - } - - void extract(utils::ArchiveWriter& writer, const ScalarRegion& region) const { - // make sure the requested region is covered by this fragment - assert_pred2(core::isSubRegion, region, getCoveredRegion()) - << "The requested region is not covered by this fragment."; - - // start by adding the extracted region - writer.write(region); - - // if the requested region is empty, we are done - if (region.empty()) return; - - // otherwise we extract the data stored in this fragment - writer.write(value); - } - - void insert(utils::ArchiveReader& reader) { - - // start by reading the encoded region - auto region = reader.read(); - - // make sure the inserted region is covered by this fragment (size is not changing) - assert_pred2(core::isSubRegion, region, getCoveredRegion()) - << "The region to be imported is not covered by this fragment!"; - - // if the imported data is empty, we are done - if (region.empty()) return; - - // otherwise we load the data from the archive - value = reader.read(); - } - - Scalar mask() { - return Scalar(*this); - } - - }; - - } - - - template - class Scalar : public core::data_item> { - - friend class detail::ScalarFragment; - - std::unique_ptr> owned; - - detail::ScalarFragment* base; - - Scalar(detail::ScalarFragment& fragment) - : base(&fragment) {} - - public: - - Scalar() - : owned(std::make_unique>(core::no_shared_data())), base(owned.get()) {} - - T& get() { - return data_item_element_access(*this, detail::ScalarRegion(true), base->value); - } - - const T& get() const { - return data_item_element_access(*this, detail::ScalarRegion(true), base->value); - } - - void set(const T& newValue) { - data_item_element_access(*this, detail::ScalarRegion(true), base->value) = newValue; - } - - }; - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/data/static_grid.h b/vendor/allscale/api/user/data/static_grid.h deleted file mode 100644 index 1af2d9e42..000000000 --- a/vendor/allscale/api/user/data/static_grid.h +++ /dev/null @@ -1,342 +0,0 @@ -#pragma once - -#include "allscale/api/user/data/grid.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - - // --------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------- - - - using coordinate_type = std::int64_t; - - template - using StaticGridPoint = GridPoint; - - template - using StaticGridBox = GridBox; - - template - using StaticGridRegion = GridRegion; - - template - class StaticGridFragment; - - template - class StaticGrid; - - - - - // --------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------- - - - template - class StaticGridFragment { - public: - - enum { Dims = sizeof...(Sizes) }; - - using shared_data_type = core::no_shared_data; - using facade_type = StaticGrid; - using region_type = StaticGridRegion; - - private: - - using point = StaticGridPoint; - using box = StaticGridBox; - - region_type size; - - utils::LargeArray data; - - public: - - StaticGridFragment(const region_type& size = region_type()) - : StaticGridFragment(core::no_shared_data(), size) {} - - StaticGridFragment(const core::no_shared_data&, const region_type& size = region_type()) : size(size), data(area(totalSize())) { - // allocate covered data space - size.scanByLines([&](const point& a, const point& b) { - data.allocate(flatten(a),flatten(b)); - }); - } - - bool operator==(const StaticGridFragment& other) const { - return data == other.data; - } - - T& operator[](const point& pos) { - return data[flatten(pos)]; - } - - const T& operator[](const point& pos) const { - return data[flatten(pos)]; - } - - StaticGrid mask() { - return StaticGrid(*this); - } - - const region_type& getCoveredRegion() const { - return size; - } - - point totalSize() const { - return point({ Sizes... }); - } - - void resize(const region_type& newSize) { - - // get the difference - region_type plus = region_type::difference(newSize,size); - region_type minus = region_type::difference(size,newSize); - - // update the size - size = newSize; - - // allocated new data - plus.scanByLines([&](const point& a, const point& b){ - data.allocate(flatten(a),flatten(b)); - }); - - // free excessive memory - minus.scanByLines([&](const point& a, const point& b){ - data.free(flatten(a),flatten(b)); - }); - } - - void insert(const StaticGridFragment& other, const region_type& area) { - assert_true(core::isSubRegion(area,other.size)) << "New data " << area << " not covered by source of size " << size << "\n"; - assert_true(core::isSubRegion(area,size)) << "New data " << area << " not covered by target of size " << size << "\n"; - - // copy data line by line using memcpy - area.scanByLines([&](const point& a, const point& b){ - auto start = flatten(a); - auto length = (flatten(b) - start) * sizeof(T); - std::memcpy(&data[start],&other.data[start],length); - }); - } - - void extract(utils::ArchiveWriter& writer, const region_type& region) const { - - // make sure the region is covered - assert_pred2(core::isSubRegion, region, getCoveredRegion()) - << "This fragment does not contain all of the requested data!"; - - // write the requested region to the archive - writer.write(region); - - // add the data - region.scan([&](const point& p){ - writer.write((*this)[p]); - }); - } - - void insert(utils::ArchiveReader& reader) { - - // extract the covered region contained in the archive - auto region = reader.read(); - - // check that it is fitting - assert_pred2(core::isSubRegion, region, getCoveredRegion()) - << "Targeted fragment does not cover data to be inserted!"; - - // insert the data - region.scan([&](const point& p){ - (*this)[p] = reader.read(); - }); - } - - private: - - static std::size_t area(const StaticGridPoint& pos) { - std::size_t res = 1; - for(std::size_t i=0; i& pos) const { - - static const std::array totalSize{ { Sizes ... } }; - - coordinate_type res = 0; - coordinate_type size = 1; - - for(int i=Dims-1; i>=0; i--) { - res += pos[i] * size; - size *= totalSize[i]; - } - - return res; - } - - }; - - template - class StaticGrid : public core::data_item> { - - /** - * A pointer to an underlying fragment owned if used in an unmanaged state. - */ - std::unique_ptr> owned; - - /** - * A reference to the fragment instance operating on, referencing the owned fragment or an externally managed one. - */ - StaticGridFragment* base; - - /** - * Enables fragments to use the private constructor below. - */ - friend class StaticGridFragment; - - /** - * The constructor to be utilized by the fragment to create a facade for an existing fragment. - */ - StaticGrid(StaticGridFragment& base) : base(&base) {} - - public: - - /** - * The number of dimensions. - */ - enum { dimensions = sizeof...(Sizes) }; - - /** - * The type of coordinate utilized by this type. - */ - using coordinate_type = StaticGridPoint; - - /** - * The type of region utilized by this type. - */ - using region_type = StaticGridRegion; - - /** - * Creates a new map covering the given region. - */ - StaticGrid() - : owned(std::make_unique>(region_type(0,size()))), base(owned.get()) {} - - /** - * A constructor for static grids accepting a size parameter, to be compatible to the dynamic sized grid. - */ - StaticGrid(const StaticGridPoint& size) - : owned(std::make_unique>(region_type(0,size))), base(owned.get()) { - assert_eq(size,this->size()) << "Initialization of invalid sized static grid."; - } - - /** - * Disable copy construction. - */ - StaticGrid(const StaticGrid&) = delete; - - /** - * Enable move construction. - */ - StaticGrid(StaticGrid&&) = default; - - /** - * Disable copy-assignments. - */ - StaticGrid& operator=(const StaticGrid&) = delete; - - /** - * Enable move assignments. - */ - StaticGrid& operator=(StaticGrid&&) = default; - - /** - * Obtains the full size of this grid. - */ - coordinate_type size() const { - return coordinate_type({ Sizes ... }); - } - - /** - * Compare the full content of the grid. - */ - bool operator==(const StaticGrid& other) const { - return *base == *other.base; - } - - /** - * Provides read/write access to one of the values stored within this grid. - */ - T& operator[](const coordinate_type& index) { - allscale_check_bounds(index, (*this)); - return data_item_element_access(*this, region_type::single(index), (*base)[index]); - } - - /** - * Provides read access to one of the values stored within this grid. - */ - const T& operator[](const coordinate_type& index) const { - allscale_check_bounds(index, (*this)); - return data_item_element_access(*this, region_type::single(index), (*base)[index]); - } - - /** - * A sequential scan over all elements within this grid, providing - * read-only access. - */ - template - void forEach(const Op& op) const { - allscale::api::user::algorithm::detail::forEach( - coordinate_type(0), - size(), - [&](const auto& pos){ - op((*this)[pos]); - } - ); - } - - /** - * A sequential scan over all elements within this grid, providing - * read/write access. - */ - template - void forEach(const Op& op) { - allscale::api::user::algorithm::detail::forEach( - coordinate_type(0), - size(), - [&](const auto& pos){ - op((*this)[pos]); - } - ); - } - - /** - * A sequential scan over all elements within this grid, providing - * read-only access. - */ - template - auto pforEach(const Op& op) const { - return algorithm::pfor(coordinate_type(0), size(), [&](const auto& pos) { op((*this)[pos]); }); - } - - /** - * A parallel scan over all elements within this grid, providing - * read/write access. - */ - template - auto pforEach(const Op& op) { - return algorithm::pfor(coordinate_type(0), size(), [&](const auto& pos) { op((*this)[pos]); }); - } - - }; - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/save_to_binary.h b/vendor/allscale/api/user/save_to_binary.h deleted file mode 100644 index 2f6de85f8..000000000 --- a/vendor/allscale/api/user/save_to_binary.h +++ /dev/null @@ -1,124 +0,0 @@ -#pragma once - -#include "allscale/api/core/io.h" -#include "allscale/api/user/algorithm/pfor.h" - - -namespace allscale { -namespace api { -namespace user { - -// Save vector of vectors to binary in parallel -template -void saveVecVecToFile(std::vector> vecVec, std::string filename, size_t innerSize) { - core::FileIOManager& manager = core::FileIOManager::getInstance(); - size_t outerSize = vecVec.size(); - - // generate output data - core::Entry binary = manager.createEntry(filename, core::Mode::Binary); - auto fout = manager.openOutputStream(binary); - -// fout.write(innerSize); - - std::vector idxVec; - for(size_t i = 0; i < innerSize; ++i) - idxVec.push_back(i); - - algorithm::pfor(idxVec, [&](size_t& i) { - fout.atomic([&](auto& out) { - // write preamble - out.write(i); - - // write data - for(size_t j = 0; j < outerSize; ++j) { - out.write(vecVec[j][i]); - } - }); - }); - - manager.close(fout); - -} - -template -void saveVecVecToFileMM(std::vector> vecVec, std::string filename, unsigned outerSize, unsigned innerSize) { - core::FileIOManager& manager = core::FileIOManager::getInstance(); - - // generate output data - core::Entry binary = manager.createEntry(filename, core::Mode::Binary); - core::MemoryMappedOutput fout = manager.openMemoryMappedOutput(binary, sizeof(T)* outerSize*innerSize); - - std::vector idxVec; - for(size_t i = 0; i < innerSize; ++i) - idxVec.push_back(i); - - auto dataOut = &fout.access();//std::array>(); - algorithm::pfor(idxVec, [&](size_t& i) { - // write data - for(size_t j = 0; j < outerSize; ++j) { - dataOut[i*outerSize + j] = vecVec[j][i]; - } - }); - manager.close(fout); -} - -// Read vector of vectors to binary in parallel -template -std::vector> readVecVecFromFile(std::string filename, size_t outerSize, size_t innerSize) { - std::vector> vecVec; - core::FileIOManager& manager = core::FileIOManager::getInstance(); - - core::Entry binary = manager.createEntry(filename, core::Mode::Binary); - auto fin = manager.openInputStream(binary); - - for(size_t j = 0; j < outerSize; ++j) { - vecVec.push_back(std::vector()); - for(size_t i = 0; i < innerSize; ++i) - vecVec[j].push_back(T()); - } - - for(size_t i = 0; i < innerSize; ++i) { - // read position from file - size_t idx = fin.read(); - - for(size_t j = 0; j < outerSize; ++j) { - // read data - vecVec[j][idx] = (fin.read()); - } - } - - manager.close(fin); - return vecVec; -} - - -// Read vector of vectors to binary in parallel -template -std::vector> readVecVecFromFileMM(std::string filename, unsigned outerSize, unsigned innerSize) { - std::vector> vecVec; - core::FileIOManager& manager = core::FileIOManager::getInstance(); - - core::Entry binary = manager.createEntry(filename, core::Mode::Binary); - auto fin = manager.openMemoryMappedInput(binary); - auto dataIn = &fin.access();//>(); - - for(size_t j = 0; j < outerSize; ++j) { - vecVec.push_back(std::vector()); - for(size_t i = 0; i < innerSize; ++i) - vecVec[j].push_back(T()); - } - - for(size_t i = 0; i < innerSize; ++i) { - for(size_t j = 0; j < outerSize; ++j) { - // read data - vecVec[j][i] = dataIn[i*outerSize + j]; - } - } - - manager.close(fin); - return vecVec; -} - -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/utils/array_utils.h b/vendor/allscale/utils/array_utils.h deleted file mode 100644 index 5de03f7dc..000000000 --- a/vendor/allscale/utils/array_utils.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include - -namespace allscale { -namespace utils { - -namespace { - template - struct array_builder { - template - std::array operator()(Fn&& fn, T&&... vals) const { - return array_builder{}(std::forward(fn), std::forward(vals)..., fn()); - } - }; - - template - struct array_builder { - template - std::array operator()(Fn&&, T&&... vals) const { - return { { std::forward(vals)... } }; - } - }; -} - -/* - * Create an Array of N elements, initialized with the elements returned by fn. Can be used to create an array of elements without default constructor - * - */ -template::type> -std::array build_array(Fn&& fn) { - return array_builder<0, N, U>()(std::forward(fn)); -} - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/assert.h b/vendor/allscale/utils/assert.h deleted file mode 100644 index 8e2ca7cc9..000000000 --- a/vendor/allscale/utils/assert.h +++ /dev/null @@ -1,132 +0,0 @@ -#pragma once - -/** - * This header file defines a set of macros to define more readable and flexible assertions within - * program code. Also, macros supporting the declaration of variables only required for checking - * assertions are supported. As all assertions, in case the macro NDEBUG is defined, they will be - * ignored. In those cases, variables declared using the 'assert_decl' macro will not be declared. - */ - -#include - -#define __allscale_xstr_(a) __allscale_str_(a) -#define __allscale_str_(a) #a - -#include "allscale/utils/unused.h" - -#if defined(NDEBUG) - -#define _assert_ignore \ - if(false) std::cerr << "" - -#define assert_decl(_DECL) ((void)0) -#define assert_true(_COND) _assert_ignore -#define assert_eq(_a, _b) _assert_ignore -#define assert_ne(_a, _b) _assert_ignore -#define assert_lt(_a, _b) _assert_ignore -#define assert_le(_a, _b) _assert_ignore -#define assert_gt(_a, _b) _assert_ignore -#define assert_ge(_a, _b) _assert_ignore -#define assert_fail() _assert_ignore -#define assert_pred1(_a, _b) _assert_ignore -#define assert_not_pred1(_a, _b) _assert_ignore -#define assert_pred2(_a, _b, _c) _assert_ignore -#define assert_not_pred2(_a, _b, _c) _assert_ignore - -#else -#include - - -namespace insieme { -namespace utils { - namespace detail { - - struct LazyAssertion { - bool value; - LazyAssertion(bool value) : value(value) {} - ~LazyAssertion() { - if(!value) { - std::cerr << "\n"; - abort(); - } - } - operator bool() const { - return !value; - } - }; - - } // end namespace detail -} // end namespace utils -} // end namespace insieme - -#define assert_decl(_DECL) _DECL - -#define assert_true(_COND) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((bool)(_COND))) \ - std::cerr << "\nAssertion " #_COND " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n" - -#define assert_eq(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) == (_B))) \ - std::cerr << "\nAssertion " #_A " == " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_ne(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) != (_B))) \ - std::cerr << "\nAssertion " #_A " != " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_lt(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) < (_B))) \ - std::cerr << "\nAssertion " #_A " < " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_le(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) <= (_B))) \ - std::cerr << "\nAssertion " #_A " <= " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_gt(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) > (_B))) \ - std::cerr << "\nAssertion " #_A " > " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_ge(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) >= (_B))) \ - std::cerr << "\nAssertion " #_A " >= " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_fail() \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion(false)) std::cerr << "\nAssertion failed in " __FILE__ ":" __allscale_xstr_(__LINE__) " - " - -#define assert_pred1(_P, _A) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((bool)((_P)(_A)))) \ - std::cerr << "\nAssertion " #_P "(" #_A ") with " #_A " = " << (_A) << " in " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n" - -#define assert_not_pred1(_P, _A) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion(!(bool)((_P)(_A)))) \ - std::cerr << "\nAssertion !" #_P "(" #_A ") with " #_A " = " << (_A) << " in " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n" - -#define assert_pred2(_P, _A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((bool)((_P)(_A, _B)))) \ - std::cerr << "\nAssertion " #_P "(" #_A ", " #_B ") with\n " #_A " = " << (_A) << "\n " #_B " = " << (_B) \ - << "\n in " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n" - -#define assert_not_pred2(_P, _A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion(!(bool)((_P)(_A, _B)))) \ - std::cerr << "\nAssertion !" #_P "(" #_A ", " #_B ") with\n " #_A " = " << (_A) << "\n " #_B " = " << (_B) \ - << "\n in " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n" - -#endif - -// ------ derived definitions ------ - -#define assert_false(_COND) assert_true(!(_COND)) -#define assert_not_implemented() assert_fail() << "Not implemented functionality in " __FILE__ ":" __allscale_xstr_(__LINE__) "\n" - -// --------- bounds checks --------- - -#if defined(ALLSCALE_CHECK_BOUNDS) - -#define allscale_check_bounds(_INDEX, _CONTAINER) \ - assert_true((_INDEX) >= 0 && (_INDEX) < (_CONTAINER).size()) << "Index " << (_INDEX) << " out of bounds " << (_CONTAINER).size(); - -#else - -#define allscale_check_bounds(_INDEX, _CONTAINER) \ - if(false) std::cerr << "" - -#endif \ No newline at end of file diff --git a/vendor/allscale/utils/bag.h b/vendor/allscale/utils/bag.h deleted file mode 100644 index ed1a61f00..000000000 --- a/vendor/allscale/utils/bag.h +++ /dev/null @@ -1,117 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/printer/join.h" - -namespace allscale { -namespace utils { - - /** - * A data structure for maintaining a collection of - * objects with duplicates. - */ - template - class Bag { - - // the element type maintained in this bag - using element_type = T; - - // internally, the data is maintained in a simple list - std::vector data; - - public: - - /** - * Tests whether this bag is empty or not. - */ - bool empty() const { - return data.empty(); - } - - /** - * Determines the number of elements in this bag. - */ - std::size_t size() const { - return data.size(); - } - - /** - * Inserts a new element in this bag. - */ - void insert(const T& element) { - data.push_back(element); - } - - /** - * Removes an element from this bag. - */ - void remove(const T& element) { - auto pos = std::find(data.begin(),data.end(),element); - if (pos == data.end()) return; - data.erase(pos); - } - - /** - * Tests whether the given element is contained within this bag. - */ - bool contains(const T& element) { - auto pos = std::find(data.begin(),data.end(),element); - return pos != data.end(); - } - - // add support for scans - - /** - * Obtains an iterator pointing to the start of the range of - * elements contained in this bag. - */ - auto begin() const { - return data.begin(); - } - - /** - * Obtains an iterator pointing to the end of the range of - * elements contained in this bag. - */ - auto end() const { - return data.end(); - } - - /** - * Runs a combined update and filter operation on the elements - * in this bag. The elements are passed by reference to the given - * body -- which may return false if elements shell be removed, tue - * otherwise. - */ - template - void updateFilter(const Body& body) { - // remove all elements where the predicate is violated - auto newEnd = std::remove_if(data.begin(), data.end(), [&](T& i) { return !body(i); }); - data.erase(newEnd,data.end()); - } - - /** - * Removes all elements from this bag which do not satisfy the - * given predicates. - */ - template - void filter(const Predicate& pred) { - updateFilter([&](const T& i) { - return pred(i); - }); - } - - /** - * Adds printer support to this bag. - */ - friend std::ostream& operator<<(std::ostream& out, const Bag& bag) { - return out << "{" << utils::join(",",bag.data) << "}"; - } - - }; - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/bitmanipulation.h b/vendor/allscale/utils/bitmanipulation.h deleted file mode 100644 index 1391ddbcc..000000000 --- a/vendor/allscale/utils/bitmanipulation.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#ifdef _MSC_VER - #include -#endif - -namespace allscale { -namespace utils { - - /** - * A wrapper function for counting leading zeros - */ - inline int countLeadingZeros(unsigned value) { - #ifdef _MSC_VER - unsigned long retVal = 0; - if(_BitScanReverse(&retVal, value)) - return 31-retVal; - // all zeros is undefined behavior, we simply return 32 - return 32; - #else - return __builtin_clz(value); - #endif - } - - /** - * A wrapper function for counting trailing zeros - */ - inline int countTrailingZeros(unsigned value) { - #ifdef _MSC_VER - unsigned long retVal = 0; - if(_BitScanForward(&retVal, value)) - return retVal; - // all zeros is undefined behavior, we simply return 32 - return 32; - #else - return __builtin_ctz(value); - #endif - } - - /** - * A wrapper function for counting 1-bits - */ - inline int countOnes(unsigned value) { - #ifdef _MSC_VER - return __popcnt(value); - #else - return __builtin_popcount(value); - #endif - } - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/concepts.h b/vendor/allscale/utils/concepts.h deleted file mode 100644 index fc5d6e72d..000000000 --- a/vendor/allscale/utils/concepts.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include - -namespace allscale { -namespace utils { - - template - struct is_equality_comparable : public std::false_type {}; - - template - struct is_equality_comparable() == std::declval()),bool>::value && - std::is_convertible() != std::declval()),bool>::value, - void>::type> : public std::true_type {}; - - - template - struct is_value : public std::false_type {}; - - template - struct is_value::value && - - // regions need to be default-constructible - std::is_copy_constructible::value && - - // regions need to be default-constructible - std::is_copy_assignable::value && - - // regions need to be destructible - std::is_destructible::value && - - // regions need to be equality comparable - utils::is_equality_comparable::value, - - void>::type> : public std::true_type {}; - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/functional_utils.h b/vendor/allscale/utils/functional_utils.h deleted file mode 100644 index fcc17a271..000000000 --- a/vendor/allscale/utils/functional_utils.h +++ /dev/null @@ -1,143 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/type_list.h" - -namespace allscale { -namespace utils { - - - // -------------------- Function Traits for Lambdas ---------------------------- - - namespace detail { - - template struct lambda_traits_helper { }; - - // get rid of const modifier - template - struct lambda_traits_helper : public lambda_traits_helper {}; - - // get rid of pointers - template - struct lambda_traits_helper : public lambda_traits_helper {}; - - // handle class of member function pointers - template - struct lambda_traits_helper : public lambda_traits_helper { - typedef C class_type; - }; - - // get rid of const modifier - template - struct lambda_traits_helper : public lambda_traits_helper {}; - - template - struct lambda_traits_helper - { - enum { arity = 0 }; - typedef R result_type; - typedef type_list<> argument_types; - }; - - template - struct lambda_traits_helper - { - enum { arity = 1 }; - typedef R result_type; - typedef T1 arg1_type; - typedef T1 argument_type; - typedef type_list argument_types; - }; - - template - struct lambda_traits_helper - { - enum { arity = 2 }; - typedef R result_type; - typedef T1 arg1_type; - typedef T2 arg2_type; - typedef T1 first_argument_type; - typedef T2 second_argument_type; - typedef type_list argument_types; - }; - - template - struct lambda_traits_helper { - enum { arity = 3 + sizeof...(A) }; - typedef R result_type; - typedef T1 arg1_type; - typedef T2 arg2_type; - typedef T3 arg3_type; - typedef type_list argument_types; - }; - - - template - struct call_operator_type { - using type = decltype(Lambda::operator()); - }; - - template - decltype(&Lambda::operator()) getCallOperator() { - return &Lambda::operator(); - } - - /* - psalz: MSVC2015 complains about multiple definitions here. - grid.h doesn't seem to need it => commented out. - template - decltype(&Lambda::template operator()) getCallOperator() { - return &Lambda::template operator(); - } - - template - decltype(&Lambda::template operator()) getCallOperator() { - return &Lambda::template operator(); - } - - template - decltype(&Lambda::template operator()) getCallOperator() { - return &Lambda::template operator(); - } - */ - - } // end namespace detail - - - template - struct lambda_traits : public detail::lambda_traits_helper())> { }; - - template - struct lambda_traits : public detail::lambda_traits_helper { }; - - template - struct lambda_traits : public lambda_traits { }; - - template - struct lambda_traits : public lambda_traits { }; - - template - struct lambda_traits : public detail::lambda_traits_helper { }; - - template - struct lambda_traits : public lambda_traits { }; - - - - template - struct is_std_function : public std::false_type {}; - - template - struct is_std_function> : public std::true_type {}; - - template - struct is_std_function : public is_std_function {}; - - template - struct is_std_function : public is_std_function {}; - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/io_utils.h b/vendor/allscale/utils/io_utils.h deleted file mode 100644 index 77f162bbb..000000000 --- a/vendor/allscale/utils/io_utils.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace allscale { -namespace utils { - - // -- some convenience utilities for stream based IO operations -- - - template - void write(std::ostream& out, T value) { - out.write((char*)&value, sizeof(T)); - } - - template - void write(std::ostream& out, const Iter& a, const Iter& b) { - for(auto it = a; it != b; ++it) { - out.write((char*)&(*it), sizeof(typename std::remove_reference::type)); - } - } - - template - T read(std::istream& in) { - T value = T(); - in.read((char*)&value, sizeof(T)); - return value; - } - - template - void read(std::istream& in, const Iter& a, const Iter& b) { - for(auto it = a; it != b; ++it) { - *it = read::type>(in); - } - } - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/large_array.h b/vendor/allscale/utils/large_array.h deleted file mode 100644 index 257a19573..000000000 --- a/vendor/allscale/utils/large_array.h +++ /dev/null @@ -1,609 +0,0 @@ -#pragma once - -#ifndef _MSC_VER - #include - #include -#else - #include - #include - -#endif - -#include - -#include -#include - -#include "allscale/utils/assert.h" - -#include "allscale/utils/printer/vectors.h" - -namespace allscale { -namespace utils { - - - namespace detail { - - /** - * Intervals are utilized by the LargeArray class to manage active intervals -- those intervals - * for which the stored values need to be preserved. - */ - class Intervals { - - /** - * A list of start/end values of the covered intervals. - * For instance, the values [10,15,18,35] correspond to the - * intervals [10,..,15) and [18,..,35). The intervals are sorted. - * The lower boundary is included, the upper boundary not. - */ - std::vector data; - - public: - - /** - * A factory function creating a list of intervals consisting of a single, - * closed range [begin,end). - */ - static Intervals fromRange(std::size_t begin, std::size_t end) { - Intervals res; - res.add(begin,end); - return res; - } - - /** - * Compares this and the given intervals for equality. - */ - bool operator==(const Intervals& other) const { - return data == other.data; - } - - /** - * Compares this and the given intervals for inequality. - */ - bool operator!=(const Intervals& other) const { - return data != other.data; - } - - /** - * Checks whether this is interval is empty. - */ - bool empty() const { - return data.empty(); - } - - /** - * Adds a new interval to the covered intervals. - * @param from the start (inclusive) of the interval to be added - * @param to the end (exclusive) of the interval to be added - */ - void add(std::size_t from, std::size_t to) { - - // skip empty ranges - if (from >= to) return; - - // insert first element - if (data.empty()) { - data.push_back(from); - data.push_back(to); - } - - // find positions for from and to - auto it_begin = data.begin(); - auto it_end = data.end(); - - auto it_from = std::upper_bound(it_begin, it_end, from); - auto it_to = std::upper_bound(it_begin, it_end, to-1); - - std::size_t idx_from = std::distance(it_begin,it_from); - std::size_t idx_to = std::distance(it_begin,it_to); - - // whether insertion is at a common place - if (it_from == it_to) { - - // if it is between ranges ... - if (idx_to % 2 == 0) { - - // check whether it is a gap closing a range - if (idx_to > 1 && idx_to < data.size() && data[idx_to-1] == from && data[idx_to] == to) { - data.erase(it_from-1,it_to+1); - return; - } - - // check whether it is connecting to the one on the left - if (idx_to > 1 && data[idx_to-1] == from) { - data[idx_to-1] = to; - return; - } - - // check whether it is connecting to the one on the right - if (idx_to < data.size() && data[idx_to] == to) { - data[idx_to] = from; - return; - } - } - - // check whether it is the end - if (it_from == it_end) { - data.push_back(from); - data.push_back(to); - return; - } - - // check whether it is within an interval - if ((idx_from % 2) == 1) { - return; // nothing to add - } - - // insert new pair at insertion position - data.insert(it_from,2,from); - data[idx_from+1] = to; - - return; - } - - // if from references an existing start value => correct it - if (idx_from % 2 == 0) { - data[idx_from] = from; - ++it_from; - } else { - // all fine - } - - // correct end of last closed interval - if (idx_to % 2 == 0) { - data[idx_to-1] = to; - it_to -= 1; - } else { - // nothing to do here - } - - if (it_from < it_to) data.erase(it_from,it_to); - - } - - /** - * Removes the given interval from the covered range. - * @param from the start (inclusive) of the interval to be removed - * @param to the end (exclusive) of the interval to be removed - */ - void remove(std::size_t from, std::size_t to) { - - // quick exits - if (from >= to) return; - if (data.empty()) return; - - // find positions for from and to - auto it_begin = data.begin(); - auto it_end = data.end(); - - auto it_from = std::upper_bound(it_begin, it_end, from); - auto it_to = std::upper_bound(it_begin, it_end, to-1); - - std::size_t idx_from = std::distance(it_begin,it_from); - std::size_t idx_to = std::distance(it_begin,it_to); - - // in case they are both at the same spot - if (idx_from == idx_to) { - - // if it is between two intervals .. - if (idx_from % 2 == 0) return; // .. there is nothing to delete - - // it is within a single interval - assert_eq(1, idx_from % 2); - - // check whether full interval is covered - if (data[idx_from-1] == from && data[idx_to] == to) { - data.erase(it_from-1,it_to+1); - return; - } - - // check if lower boundary matches - if (data[idx_from-1] == from) { - data[idx_from-1] = to; - return; - } - - // check if lower boundary matches - if (data[idx_to] == to) { - data[idx_to] = from; - return; - } - - data.insert(it_from,2,from); - data[idx_from+1] = to; - return; - - } - - if (idx_from % 2 == 1) { - data[idx_from] = from; - it_from++; - } - - if (idx_to % 2 == 1) { - data[idx_to-1] = to; - it_to--; - } - - // delete nodes in-between - data.erase(it_from,it_to); - return; - - } - - /** - * Removes the given intervals from the covered range. - * @param other the intervals to be removed - */ - void remove(const Intervals& other) { - // iteratively remove the elements of the given interval - for(std::size_t i =0; i::min()); - data.insert(data.end(), std::numeric_limits::max()); - - // remove first pair if it is empty - if (data[0] == data[1]) { - for(std::size_t i = 0; i= to) return true; - auto begin = data.begin(); - auto end = data.end(); - auto a = std::upper_bound(begin, end, from); - auto b = std::upper_bound(begin, end, to-1); - return a == b && a != end && ((std::distance(begin,a) % 2) == 1); - } - - /** - * Tests whether any the points within the range [from,...,to) are covered by this intervals. - */ - bool coversAny(std::size_t from, std::size_t to) const { - if (from >= to) return false; - auto begin = data.begin(); - auto end = data.end(); - auto a = std::upper_bound(begin, end, from); - auto b = std::upper_bound(begin, end, to-1); - return a < b || (a == b && a != end && ((std::distance(begin,a) % 2) == 1)); - } - - /** - * Swaps the content of this interval with the given one. - */ - void swap(Intervals& other) { - data.swap(other.data); - } - - /** - * Invokes the given function for each index in the covered intervals. - */ - template - void forEach(const Fun& fun) const { - // iterate through the individual intervals - for(std::size_t i =0; i - class LargeArray { - - /** - * A pointer to the first element of the array. - */ - T* data; - - /** - * The size of this large array. - */ - std::size_t size; - - /** - * The list of active ranges in this large array (for which the memory is kept alive). - */ - detail::Intervals active_ranges; - - public: - - /** - * Creates a new large array of the given size. - */ - LargeArray(std::size_t size) : data(nullptr), size(size) { - - // check whether there is something to allocate - if (size == 0) return; - - // allocate the address space - #ifdef _MSC_VER - data = (T*)malloc(sizeof(T)*size); - assert_true(data != nullptr) << "Failed to allocate memory of size" << sizeof(T)*size; - #else - data = (T*)mmap(nullptr,sizeof(T)*size, - PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, - -1,0 - ); - #endif - assert_ne((void*)-1,(void*)data); - } - - /** - * Explicitly deleted copy constructor. - */ - LargeArray(const LargeArray&) = delete; - - /** - * A move constructor for large arrays. - */ - LargeArray(LargeArray&& other) - : data(other.data), size(other.size), active_ranges(std::move(other.active_ranges)) { - assert_true(other.active_ranges.empty()); - other.data = nullptr; - } - - /** - * Destroys this array. - */ - ~LargeArray() { - - // if there is no data, nothing to do - if (data == nullptr) return; - - // call the destructor for the remaining objects (if required) - if (!std::is_trivially_destructible::value) { - active_ranges.forEach([this](std::size_t i){ - data[i].~T(); - }); - } - - // free the data - #ifdef _MSC_VER - ::free(data); - #else - munmap(data,sizeof(T)*size); - #endif - } - - /** - * Explicitly deleted copy-assignment operator. - */ - LargeArray& operator=(const LargeArray&) = delete; - - /** - * Implementation of move assignment operator. - */ - LargeArray& operator=(LargeArray&& other) { - assert_ne(data,other.data); - if (data) { - #ifdef _MSC_VER - ::free(data); - #else - munmap(data, sizeof(T)*size); - #endif - } - std::swap(data,other.data); - size = other.size; - active_ranges.swap(other.active_ranges); - return *this; - } - - bool operator==(const LargeArray& other) const { - // quick check - if (this == &other) return true; - - // check the same size - if (size != other.size) return false; - - // make sure both have allocated all the space - assert_eq(active_ranges, other.active_ranges); - - // compare active ranges - bool res = true; - active_ranges.forEach([&](std::size_t pos){ - res = res && (data[pos] == other.data[pos]); - }); - return res; - } - - /** - * Allocates the given range within this large array. - * After this call, the corresponding sub-range can be accessed. - */ - void allocate(std::size_t start, std::size_t end) { - // check for emptiness - if (start >= end) return; - assert_le(end, size) << "Invalid range " << start << " - " << end << " for array of size " << size; - - - // invoke the constructor for the released objects (if required) - if (!std::is_trivially_constructible::value) { - - // compute the ranges of new elements - auto newElements = detail::Intervals::fromRange(start,end); - newElements.remove(active_ranges); - - - // initialize the newly allocated elements - newElements.forEach([this](std::size_t i){ - new (&data[i]) T(); - }); - } - - // add to active range - active_ranges.add(start,end); - } - - /** - * Frees the given range, thereby deleting the content and freeing the - * associated memory pages. - */ - void free(std::size_t start, std::size_t end) { - - // check for emptiness - if (start >= end) return; - assert_le(end, size) << "Invalid range " << start << " - " << end << " for array of size " << size; - - // invoke the destructor for the released objects (if required) - if (!std::is_trivially_destructible::value) { - - // compute the elements to be removed - auto removedElements = detail::Intervals::fromRange(start,end); - removedElements.retain(active_ranges); - - // delete elements to be removed - removedElements.forEach([this](std::size_t i){ - data[i].~T(); // explicit destructor call - }); - - } - - // remove range from active ranges - active_ranges.remove(start,end); - - #ifdef _MSC_VER - // do nothing - #else - // get address of lower boundary - uintptr_t ptr_start = (uintptr_t)(data + start); - uintptr_t ptr_end = (uintptr_t)(data + end); - - auto page_size = getPageSize(); - uintptr_t pg_start = ptr_start - (ptr_start % page_size); - uintptr_t pg_end = ptr_end - (ptr_end % page_size) + page_size; - - std::size_t idx_start = (pg_start - (uintptr_t)(data)) / sizeof(T); - std::size_t idx_end = (pg_end - (uintptr_t)(data)) / sizeof(T); - - assert_le(idx_start,start); - assert_le(end,idx_end); - - if (active_ranges.coversAny(idx_start,start)) pg_start += page_size; - if (active_ranges.coversAny(end,idx_end)) pg_end -= page_size; - pg_end = std::min(pg_end,ptr_end); - - if (pg_start >= pg_end) return; - - - void* section_start = (void*)pg_start; - std::size_t length = pg_end - pg_start; - munmap(section_start, length); - auto res = mmap(section_start, length, - PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE | MAP_FIXED, - -1,0 - ); - if ((void*)-1 == (void*)res) { - assert_ne((void*)-1,(void*)res); - } - #endif - } - - /** - * Provides mutable access to the element at the given position. - */ - T& operator[](std::size_t pos) { - return data[pos]; - } - - /** - * Provides read-only access to the element at the given position. - */ - const T& operator[](std::size_t pos) const { - return data[pos]; - } - - private: - - /** - * Determines the memory page size of the system. - */ - static long getPageSize() { - #ifndef _MSC_VER - static const long PAGE_SIZE = sysconf(_SC_PAGESIZE); - #else - static const long PAGE_SIZE = 0; - #endif - return PAGE_SIZE; - } - - }; - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/printer/arrays.h b/vendor/allscale/utils/printer/arrays.h deleted file mode 100644 index 9a1488d5d..000000000 --- a/vendor/allscale/utils/printer/arrays.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/printer/join.h" - -namespace std { - - template - ostream& operator<<(ostream& out, const array& data) { - return out << "[" << allscale::utils::join(",", data) << "]"; - } - -} diff --git a/vendor/allscale/utils/printer/join.h b/vendor/allscale/utils/printer/join.h deleted file mode 100644 index 15373b351..000000000 --- a/vendor/allscale/utils/printer/join.h +++ /dev/null @@ -1,79 +0,0 @@ -#pragma once - -namespace allscale { -namespace utils { - - namespace detail { - - template - struct DefaultElementPrinter { - void operator()(std::ostream& out, const T& value) const { - out << value; - } - }; - - template - class joinable { - - Iter begin; - Iter end; - Sep sep; - ElementPrinter printer; - - public: - - joinable(const Iter& begin, const Iter& end, const Sep& sep, const ElementPrinter& printer = ElementPrinter()) - : begin(begin), end(end), sep(sep), printer(printer) {} - - friend - std::ostream& operator<<(std::ostream& out, const joinable& j) { - if (j.begin == j.end) return out; - Iter cur = j.begin; - j.printer(out, *cur); - cur++; - while(cur != j.end) { - out << j.sep; - j.printer(out, *cur); - cur++; - } - return out; - } - - }; - - } - - - template::value_type> - detail::joinable> join(const char* sep, const Iter& begin, const Iter& end) { - return detail::joinable>(begin,end,sep); - } - - template::value_type> - detail::joinable> join(const std::string& sep, const Iter& begin, const Iter& end) { - return detail::joinable>(begin,end,sep); - } - - template - auto join(const Sep& sep, const Container& c) -> decltype(join(sep, c.cbegin(), c.cend())) { - return join(sep, c.cbegin(), c.cend()); - } - - template - detail::joinable join(const char* sep, const Iter& begin, const Iter& end, const Printer& printer) { - return detail::joinable(begin,end,sep,printer); - } - - template - detail::joinable join(const std::string& sep, const Iter& begin, const Iter& end, const Printer& printer) { - return detail::joinable(begin,end,sep,printer); - } - - template - auto join(const Sep& sep, const Container& c, const Printer& p) -> decltype(join(sep, c.cbegin(), c.cend(),p)) { - return join(sep, c.cbegin(), c.cend(),p); - } - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/printer/pairs.h b/vendor/allscale/utils/printer/pairs.h deleted file mode 100644 index 85ca619ea..000000000 --- a/vendor/allscale/utils/printer/pairs.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include -#include - -namespace std { - - template - ostream& operator<<(ostream& out, const pair& data) { - return out << "[" << data.first << "," << data.second << "]"; - } - -} diff --git a/vendor/allscale/utils/printer/set.h b/vendor/allscale/utils/printer/set.h deleted file mode 100644 index 404597103..000000000 --- a/vendor/allscale/utils/printer/set.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/printer/join.h" - -namespace std { - - template - ostream& operator<<(ostream& out, const set& data) { - return out << "{" << allscale::utils::join(",", data) << "}"; - } - -} diff --git a/vendor/allscale/utils/printer/vectors.h b/vendor/allscale/utils/printer/vectors.h deleted file mode 100644 index e197f986a..000000000 --- a/vendor/allscale/utils/printer/vectors.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/printer/join.h" - -namespace std { - - template - ostream& operator<<(ostream& out, const vector& data) { - return out << "[" << allscale::utils::join(",", data) << "]"; - } - -} diff --git a/vendor/allscale/utils/range.h b/vendor/allscale/utils/range.h deleted file mode 100644 index e0b7ed549..000000000 --- a/vendor/allscale/utils/range.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace allscale { -namespace utils { - - namespace detail { - - template - struct get_size { - std::size_t operator()(const Iter& a, const Iter& b) { - return std::distance(a,b); - } - }; - - template - struct get_size { - std::size_t operator()(const T* a, const T* b) { - return b - a; - } - }; - } - - - template - struct range { - Iter _begin; - Iter _end; - - Iter begin() const { - return _begin; - } - - Iter end() const { - return _end; - } - - bool empty() const { - return _begin == _end; - } - - std::size_t size() const { - return detail::get_size()(_begin,_end); - } - - const typename std::iterator_traits::value_type& front() const { - return *_begin; - } - - const typename std::iterator_traits::value_type& back() const { - return *(_end - 1); - } - }; - - template - bool operator==(const std::vector& data, const range& range) { - if (data.size() != range.size()) return false; - return std::equal(data.begin(), data.end(), range.begin()); - } - - template - bool operator==(const range& range, const std::vector& data) { - return data == range; - } - - template - bool operator!=(const std::vector& data, const range& range) { - return !(data == range); - } - - template - bool operator!=(const range& range, const std::vector& data) { - return data != range; - } - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/raw_buffer.h b/vendor/allscale/utils/raw_buffer.h deleted file mode 100644 index a38e54614..000000000 --- a/vendor/allscale/utils/raw_buffer.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once - -namespace allscale { -namespace utils { - - /** - * A utility for interpreting raw buffers. - */ - class RawBuffer { - - char* cur; - - public: - - /** - * Creates a buffer based on the given memory location. - */ - template - RawBuffer(T* base) : cur(reinterpret_cast(base)) {} - - /** - * Consumes an element of type T from the underlying buffer. - */ - template - T& consume() { - return consumeArray(1)[0]; - } - - /** - * Consumes an array of elements of type T form the underlying buffer. - */ - template - T* consumeArray(std::size_t numElements) { - - // check that the given type allows this kind of operations - static_assert( - std::is_trivially_copy_assignable::value || - std::is_trivially_move_assignable::value, - "Invalid reinterpretation of raw data!" - ); - - // 'parse' initial elements - auto res = reinterpret_cast(cur); - // progress position - cur += sizeof(T) * numElements; - // return result - return res; - } - - }; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/serializer.h b/vendor/allscale/utils/serializer.h deleted file mode 100644 index 92791b8bf..000000000 --- a/vendor/allscale/utils/serializer.h +++ /dev/null @@ -1,500 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "allscale/utils/assert.h" - -#if defined(ALLSCALE_WITH_HPX) -#include -#include -#include -#include -#endif - -namespace allscale { -namespace utils { - - // --------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------- - - /** - * An archive contains the serialized version of some data structure (fragment). - * It enables the exchange of data between e.g. address spaces. - */ - class Archive; - - /** - * An archive writer is a builder for archives. It is utilized for serializing objects. - */ - class ArchiveWriter; - - /** - * An archive reader is a utility to reconstruct data structures from archives. - */ - class ArchiveReader; - - /** - * A serializer describes the way types are converted to and restored from archives. - */ - template - struct serializer; - - /** - * This type trait can be utilized to test whether a given type is serializable, - * thus packable into an archive, or not. - */ - template - struct is_serializable; - - /** - * A facade function for packing an object into an archive. - */ - template - typename std::enable_if::value,Archive>::type - serialize(const T&); - - /** - * A facade function for unpacking an object from an archive. - */ - template - typename std::enable_if::value,T>::type - deserialize(Archive&); - - - // --------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------- - - - namespace detail { - - /** - * A simple, initial, functionally complete implementation of a data buffer - * for storing data within an archive. - */ - class DataBuffer { - - // check some underlying assumption - static_assert(sizeof(char)==1, "If a char is more than a byte, this implementation needs to be checked."); - - // the actual data store (std::vector handles the dynamic growing for us) - std::vector data; - - public: - - DataBuffer() {} - - DataBuffer(const DataBuffer&) = default; - DataBuffer(DataBuffer&&) = default; - - DataBuffer(const std::vector& data) : data(data) {} - DataBuffer(std::vector&& data) : data(std::move(data)) {} - - DataBuffer& operator=(const DataBuffer&) = default; - DataBuffer& operator=(DataBuffer&&) = default; - - /** - * The main function for appending data to this buffer. - */ - void append(const char* start, std::size_t count) { - // create space - auto pos = data.size(); - data.resize(pos + count / sizeof(char)); - - // append at end - std::memcpy(&data[pos],start,count); - - } - - /** - * Obtains the number of bytes this buffer is occupying. - */ - std::size_t size() const { - return data.size() * sizeof(char); - } - - /** - * Obtains a pointer to the begin of the internally maintained buffer (inclusive). - */ - const char* begin() const { - return &data.front(); - } - - /** - * Obtains a pointer to the end of the internally maintained buffer (exclusive). - */ - const char* end() const { - return &data.back() + 1; - } - - /** - * Support implicit conversion of this buffer to a vector of characters. - */ - operator const std::vector&() const { - return data; - } - - /** - * Also enable the implicit hand-off of the ownership of the underlying char store. - */ - operator std::vector() && { - return std::move(data); - } - - - }; - - } // end namespace detail - - - class Archive { - - friend class ArchiveWriter; - friend class ArchiveReader; - - // the data represented by this archive - detail::DataBuffer data; - - Archive(detail::DataBuffer&& data) - : data(std::move(data)) {} - - public: - - - - Archive(const Archive&) = default; - Archive(Archive&&) = default; - - Archive(const std::vector& buffer) : data(buffer) {} - Archive(std::vector&& buffer) : data(std::move(buffer)) {} - - Archive& operator=(const Archive&) = default; - Archive& operator=(Archive&&) = default; - - /** - * Support implicit conversion of this archive to a vector of characters. - */ - operator const std::vector&() const { - return data; - } - - /** - * Also enable the implicit hand-off of the ownership of the underlying buffer. - */ - operator std::vector() && { - return std::move(data); - } - - /** - * Provide explicit access to the underlying char buffer. - */ - const std::vector& getBuffer() const { - return data; - } - }; - -#if !defined(ALLSCALE_WITH_HPX) - class ArchiveWriter { - - // the buffer targeted by this archive writer - detail::DataBuffer data; - - public: - - ArchiveWriter() {} - - ArchiveWriter(const ArchiveWriter&) = delete; - ArchiveWriter(ArchiveWriter&&) = default; - - ArchiveWriter& operator=(const ArchiveWriter&) = delete; - ArchiveWriter& operator=(ArchiveWriter&&) = default; - - /** - * Appends a given number of bytes to the end of the underlying data buffer. - */ - void write(const char* src, std::size_t count) { - data.append(src,count); - } - - /** - * A utility function wrapping the invocation of the serialization mechanism. - */ - template - std::enable_if_t::value,void> - write(const T& value) { - // use serializer to store object of this type - serializer::store(*this,value); - } - - /** - * Obtains the archive produces by this writer. After the call, - * this writer must not be used any more. - */ - Archive toArchive() && { - return std::move(data); - } - - }; -#else - class ArchiveWriter { - hpx::serialization::output_archive &ar_; - - public: - ArchiveWriter(hpx::serialization::output_archive &ar) : ar_(ar) {} - - /** - * Appends a given number of bytes to the end of the underlying data buffer. - */ - void write(const char* src, std::size_t count) { - ar_ & hpx::serialization::make_array(src, count); - } - - /** - * A utility function wrapping the invocation of the serialization mechanism. - */ - template - std::enable_if_t::value,void> - write(const T& value) { -// // use serializer to store object of this type - serializer::store(*this,value); - } - - template - std::enable_if_t::value,void> - write(const T& value) { - ar_ & value; - } - }; -#endif - -#if !defined(ALLSCALE_WITH_HPX) - class ArchiveReader { - - // the current point of the reader - const char* cur; - - // the end of the reader (only checked for debugging) - const char* end; - - public: - - /** - * A archive reader can only be obtained from an existing archive. - */ - ArchiveReader(const Archive& archive) - : cur(archive.data.begin()), end(archive.data.end()) {} - - ArchiveReader(const ArchiveReader&) = delete; - ArchiveReader(ArchiveReader&&) = default; - - ArchiveReader& operator=(const ArchiveReader&) = delete; - ArchiveReader& operator=(ArchiveReader&&) = default; - - /** - * Reads a number of bytes from the underlying buffer. - */ - void read(char* dst, std::size_t count) { - // copy the data - std::memcpy(dst,cur,count); - // move pointer forward - cur += count; - - // make sure that we did not cross the end of the buffer - assert_le(cur,end); - } - - /** - * A utility function wrapping up the de-serialization of an object - * of type T from the underlying buffer. - */ - template - std::enable_if_t::value,T> - read() { - // use serializer to restore object of this type - return serializer::load(*this); - } - - }; -#else - class ArchiveReader { - hpx::serialization::input_archive &ar_; - - public: - ArchiveReader(hpx::serialization::input_archive &ar) : ar_(ar) {} - - /** - * Reads a number of bytes from the underlying buffer. - */ - void read(char* dst, std::size_t count) { - ar_ & hpx::serialization::make_array(dst, count); - } - - /** - * A utility function wrapping up the de-serialization of an object - * of type T from the underlying buffer. - */ - template - std::enable_if_t::value,T> - read() { - // use serializer to restore object of this type - return serializer::load(*this); - } - - template - std::enable_if_t::value,T> - read() { - // use serializer to restore object of this type - T t; - ar_ & t; - return t; - } - }; -#endif - - - /** - * Adds support for the serialization to every type T supporting - * - * - a static member function T load(ArchiveReader&) - * - a member function void store(ArchiveWriter&) - * - * Thus, serialization / deserialization can be integrated through member functions. - */ - template - struct serializer())),T>::value && - // ... and a store member function - std::is_same().store(std::declval())),void>::value, - void>::type> { - - static T load(ArchiveReader& a) { - return T::load(a); - } - static void store(ArchiveWriter& a, const T& value) { - value.store(a); - } - }; - - - /** - * Enables the skipping of const qualifiers for types. - * Also const values can be serialized and deserialized if requested. - */ - template - struct serializer::value, - void>::type> : public serializer {}; - - - - // -- primitive type serialization -- - - namespace detail { - - /** - * A helper functor for serializing primitive types. - */ - template - struct primitive_serializer { - static T load(ArchiveReader& reader) { - T res = 0; - reader.read(reinterpret_cast(&res),sizeof(T)); - return res; - } - static void store(ArchiveWriter& writer, const T& value) { - writer.write(reinterpret_cast(&value),sizeof(T)); - } - }; - - } // end namespace detail - - template<> struct serializer : public detail::primitive_serializer {}; - - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - - - template - struct is_serializable : public std::false_type {}; - - template - struct is_serializable::load)), T(*)(Archive&)>::value && - std::is_same::store)), void(*)(Archive&, const T&)>::value, - void>::type> : public std::true_type {}; - - - - // -- facade functions -- -#if !defined(ALLSCALE_WITH_HPX) - template - typename std::enable_if::value,Archive>::type - serialize(const T& value) { - ArchiveWriter writer; - writer.write(value); - return std::move(writer).toArchive(); - } - - template - typename std::enable_if::value,T>::type - deserialize(Archive& a) { - return ArchiveReader(a).read(); - } -#endif - -} // end namespace utils -} // end namespace allscale - -#if defined(ALLSCALE_WITH_HPX) -namespace hpx { -namespace serialization { - template - typename std::enable_if< - ::allscale::utils::is_serializable::value && - !(std::is_integral::value || std::is_floating_point::value), - output_archive& - >::type - serialize(output_archive & ar, T const & t, int) { - allscale::utils::ArchiveWriter writer(ar); - writer.write(t); - return ar; - } - - template - typename std::enable_if< - ::allscale::utils::is_serializable::value && - !(std::is_integral::value || std::is_floating_point::value), - input_archive& - >::type - serialize(input_archive & ar, T & t, int) { - - allscale::utils::ArchiveReader reader(ar); - t = reader.read(); - return ar; - } -} // end namespace serialization -} // end namespace allscale -#endif diff --git a/vendor/allscale/utils/serializer/arrays.h b/vendor/allscale/utils/serializer/arrays.h deleted file mode 100644 index 079405334..000000000 --- a/vendor/allscale/utils/serializer/arrays.h +++ /dev/null @@ -1,60 +0,0 @@ -#pragma once - -#ifdef ALLSCALE_WITH_HPX - #include -#endif - -#include "allscale/utils/serializer.h" - -#include - -namespace allscale { -namespace utils { - - - namespace detail { - - template - struct array_load_helper { - - template - std::array operator()(ArchiveReader& reader, Args&& ... args) { - return array_load_helper()(reader,args...,reader.read()); - } - }; - - template - struct array_load_helper { - - template - std::array operator()(ArchiveReader&, Args&& ... args) { - return std::array{ - { args... } - }; - } - - }; - - } - - - /** - * Add support for serializing / de-serializing arrays. - */ - template - struct serializer,typename std::enable_if::value,void>::type> { - - static std::array load(ArchiveReader& reader) { - // support loading of array for elements without default constructor - return detail::array_load_helper()(reader); - } - static void store(ArchiveWriter& writer, const std::array& value) { - for(const auto& cur : value) { - writer.write(cur); - } - } - }; - -} // end namespace utils -} // end namespace allscale - diff --git a/vendor/allscale/utils/serializer/strings.h b/vendor/allscale/utils/serializer/strings.h deleted file mode 100644 index 619b59efd..000000000 --- a/vendor/allscale/utils/serializer/strings.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once - -#ifdef ALLSCALE_WITH_HPX -#include -#endif - -#include "allscale/utils/serializer.h" - -#include - -namespace allscale { -namespace utils { - - /** - * Add support for serializing / de-serializing strings. - */ - template<> - struct serializer { - - static std::string load(ArchiveReader& reader) { - auto size = reader.read(); - std::string res; - res.resize(size); - reader.read(&res[0],size); - return res; - } - static void store(ArchiveWriter& writer, const std::string& value) { - writer.write(value.size()); - writer.write(&value[0],value.size()); - } - }; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/serializer/vectors.h b/vendor/allscale/utils/serializer/vectors.h deleted file mode 100644 index 81870ed35..000000000 --- a/vendor/allscale/utils/serializer/vectors.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#ifdef ALLSCALE_WITH_HPX - #include "allscale/utils/serializer.h" -#endif - -#include - -#include "allscale/utils/serializer/arrays.h" - -namespace allscale { -namespace utils { - - /** - * Add support for serializing / de-serializing std::vectors. - */ - template - struct serializer,typename std::enable_if::value,void>::type> { - - static std::vector load(ArchiveReader& reader) { - - // create the result - std::vector res; - - // load the size - auto size = reader.read(); - - // make some space - res.reserve(size); - - // load the elements - for(std::size_t i=0; i()); - } - - // done - return res; - } - static void store(ArchiveWriter& writer, const std::vector& value) { - - // start with the size - writer.write(value.size()); - - // followed by all the elements - for(const auto& cur : value) { - writer.write(cur); - } - } - }; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/static_grid.h b/vendor/allscale/utils/static_grid.h deleted file mode 100644 index 582282e28..000000000 --- a/vendor/allscale/utils/static_grid.h +++ /dev/null @@ -1,247 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/functional_utils.h" -#include "allscale/utils/serializer.h" -#include "allscale/utils/vector.h" - -namespace allscale { -namespace utils { - - template - struct StaticGrid; - - template - struct StaticGrid { - using data_type = std::array,a>; - using addr_type = utils::Vector; - - private: - - data_type data; - - template - typename std::enable_if::value,void>::type - assignInternal(const StaticGrid& other) { - std::memcpy(&data,&other.data,sizeof(data_type)); - } - - template - typename std::enable_if::value,void>::type - assignInternal(const StaticGrid& other) { - data = other.data; - } - - public: - - StaticGrid& operator=(const StaticGrid& other) { - if (this == &other) return *this; - assignInternal(other); - return *this; - } - - Cell& operator[](const addr_type& addr) { - return this->template operator[](addr); - } - - const Cell& operator[](const addr_type& addr) const { - return this->template operator[](addr); - } - - template - Cell& operator[](const utils::Vector& addr) { - allscale_check_bounds((size_t)addr[D - sizeof...(rest)-1], data); - return data[addr[D-sizeof...(rest)-1]][addr]; - } - - template - const Cell& operator[](const utils::Vector& addr) const { - allscale_check_bounds((size_t)addr[D - sizeof...(rest)-1], data); - return data[addr[D-sizeof...(rest)-1]][addr]; - } - - utils::Vector size() const { - return { a, rest... }; - } - - template - std::enable_if_t::arity == 1, void> - forEach(const Lambda& lambda) const { - for(const auto& cur : data) { - cur.forEach(lambda); - } - } - - template - std::enable_if_t::arity == 1, void> - forEach(const Lambda& lambda) { - for(auto& cur : data) { - cur.forEach(lambda); - } - } - - template - std::enable_if_t::arity == 2, void> - forEach(const Lambda& lambda) const { - addr_type pos; - _forEachInternal(pos,lambda); - } - - template - std::enable_if_t::arity == 2, void> - forEach(const Lambda& lambda) { - addr_type pos; - _forEachInternal(pos,lambda); - } - - void store(utils::ArchiveWriter& writer) const { - for(const auto& e : data) { - writer.write(e); - } - } - - static StaticGrid load(utils::ArchiveReader& reader) { - StaticGrid grid; - for(auto& e : grid.data) { - e = reader.read(); - } - return grid; - } - - private: - - template - friend struct StaticGrid; - - template - std::enable_if_t::arity == 2, void> - _forEachInternal(utils::Vector& pos, const Lambda& lambda) const { - auto& i = pos[D-sizeof...(rest)-1]; - i = 0; - for(const auto& cur : data) { - cur._forEachInternal(pos,lambda); - i++; - } - } - - template - std::enable_if_t::arity == 2, void> - _forEachInternal(utils::Vector& pos, const Lambda& lambda) { - auto& i = pos[D-sizeof...(rest)-1]; - i = 0; - for(auto& cur : data) { - cur._forEachInternal(pos,lambda); - i++; - } - } - - }; - - template - struct StaticGrid { - using data_type = Cell; - using addr_type = utils::Vector; - - private: - - data_type data; - - template - typename std::enable_if::value,void>::type - assignInternal(const StaticGrid& other) { - std::memcpy(&data,&other.data,sizeof(data_type)); - } - - template - typename std::enable_if::value,void>::type - assignInternal(const StaticGrid& other) { - data = other.data; - } - - public: - - StaticGrid& operator=(const StaticGrid& other) { - if (this == &other) return *this; - assignInternal(other); - return *this; - } - - Cell& operator[](const addr_type& addr) { - return this->template operator[]<0>(addr); - } - - const Cell& operator[](const addr_type& addr) const { - return this->template operator[]<0>(addr); - } - - template - Cell& operator[](const utils::Vector&) { - return data; - } - - template - const Cell& operator[](const utils::Vector&) const { - return data; - } - - std::size_t size() const { - return 1; - } - - template - std::enable_if_t::arity == 1, void> - forEach(const Lambda& lambda) const { - lambda(data); - } - - template - std::enable_if_t::arity == 1, void> - forEach(const Lambda& lambda) { - lambda(data); - } - - template - std::enable_if_t::arity == 2, void> - forEach(const Lambda& lambda) const { - lambda(addr_type(),data); - } - - template - std::enable_if_t::arity == 2, void> - forEach(const Lambda& lambda) { - lambda(addr_type(),data); - } - - void store(utils::ArchiveWriter& writer) const { - writer.write(data); - } - - static StaticGrid load(utils::ArchiveReader& reader) { - StaticGrid grid; - grid.data = std::move(reader.read()); - return grid; - } - - private: - - template - friend struct StaticGrid; - - template - std::enable_if_t::arity == 2, void> - _forEachInternal(utils::Vector& pos, const Lambda& lambda) const { - lambda(const_cast&>(pos),data); - } - - template - std::enable_if_t::arity == 2, void> - _forEachInternal(utils::Vector& pos, const Lambda& lambda) { - lambda(const_cast&>(pos),data); - } - - }; - -} // end utils -} // end namespace allscale diff --git a/vendor/allscale/utils/static_map.h b/vendor/allscale/utils/static_map.h deleted file mode 100644 index 44e5c6083..000000000 --- a/vendor/allscale/utils/static_map.h +++ /dev/null @@ -1,120 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/type_list.h" - -namespace allscale { -namespace utils { - - // -------------------------------------------------------------------- - // Declarations - // -------------------------------------------------------------------- - - - /** - * A static map mapping a given value to each of a given list of types. - */ - template - class StaticMap; - - /** - * An auxiliary type for forming lists of keys. - */ - template - struct keys {}; - - - // -------------------------------------------------------------------- - // Definitions - // -------------------------------------------------------------------- - - namespace key_utils { - - template - struct is_keys : public std::false_type {}; - - template - struct is_keys> : public std::true_type {}; - - template - struct invalid_key : public std::false_type {}; - } - - template - class StaticMap { - - static_assert(key_utils::is_keys::value, "First template parameters must be of form keys<...>"); - - }; - - - template - class StaticMap,Value> { - - using key_list = type_list; - - std::array values; - - public: - - // -- accessors and mutators -- - - StaticMap(const Value& value) { - for(auto& cur : values) cur = value; - } - - StaticMap() = default; - StaticMap(const StaticMap&) = default; - StaticMap(StaticMap&&) = default; - - StaticMap& operator=(const StaticMap&) = default; - StaticMap& operator=(StaticMap&&) = default; - - // -- accessors and mutators -- - - template - Value& get() { - return values[type_index::value]; - } - - template - const Value& get() const { - return values[type_index::value]; - } - - auto begin() { - return values.begin(); - } - - auto begin() const { - return values.begin(); - } - - auto end() { - return values.end(); - } - - auto end() const { - return values.end(); - } - - template - void forEach(const Body& body) { - for(auto& cur : values) { - body(cur); - } - } - - template - void forEach(const Body& body) const { - for(const auto& cur : values) { - body(cur); - } - } - - }; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/string_utils.h b/vendor/allscale/utils/string_utils.h deleted file mode 100644 index 439931fa5..000000000 --- a/vendor/allscale/utils/string_utils.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#include -#include -#include - -template -std::string toString(const T& value) { - std::stringstream res; - res << value; - return res.str(); -} diff --git a/vendor/allscale/utils/table.h b/vendor/allscale/utils/table.h deleted file mode 100644 index 72dfc850a..000000000 --- a/vendor/allscale/utils/table.h +++ /dev/null @@ -1,243 +0,0 @@ -#pragma once - -#include - -#include "allscale/utils/assert.h" -#include "allscale/utils/io_utils.h" -#include "allscale/utils/raw_buffer.h" -#include "allscale/utils/printer/join.h" - -namespace allscale { -namespace utils { - - /** - * A container for a const-sized array of elements, which may or may - * not be owned by instances of this type. - */ - template - class Table { - - std::size_t length; - - T* data; - - bool owned; - - public: - - using const_iterator = const T*; - using iterator = T*; - - Table() - : length(0), data(nullptr), owned(false) {} - - Table(std::size_t size) - : length(size), data(allocate(length)), owned(true) { - - // see whether there is something to do - if (std::is_trivially_default_constructible::value) return; - - // use in-place default constructor - for(auto& cur : *this) { - new (&cur) T(); - } - } - - Table(std::size_t size, const T& value) - : length(size), data(allocate(length)), owned(true) { - // use in-place copy constructor - for(auto& cur : *this) { - new (&cur) T(value); - } - } - - Table(T* data, std::size_t size) - : length(size), data(data), owned(false) {} - - Table(T* begin, T* end) - : Table(begin,std::distance(begin,end)) {} - - - Table(const Table& other) - : length(other.length), - data(allocate(length)), - owned(true) { - - // see whether there is something to do - if (length > 0 && std::is_trivially_copy_constructible::value) { - std::memcpy(data,other.data,sizeof(T)*length); - return; - } - - // use in-place constructor to copy data - for(std::size_t i=0; i::value) { - for(auto& cur : *this) { - cur.~T(); - } - } - - // free the owned memory - free(data); - } - - - Table& operator=(const Table& other) { - - // shortcut for stupid stuff - if (this == &other) return *this; - - // free old state - this->~Table(); - - // create a copy of the new state - new (this) Table(other); - - // done - return *this; - } - - Table& operator=(Table&& other) { - - // shortcut for stupid stuff - assert_ne(this,&other) << "Should not be possible!"; - - // free old state - this->~Table(); - - // create a copy of the new state - new (this) Table(std::move(other)); - - // done - return *this; - } - - bool empty() const { - return length == 0; - } - - std::size_t size() const { - return length; - } - - T& operator[](std::size_t i) { - return data[i]; - } - - const T& operator[](std::size_t i) const { - return data[i]; - } - - const_iterator begin() const { - return data; - } - - const_iterator cbegin() const { - return data; - } - - iterator begin() { - return data; - } - - const_iterator end() const { - return data + length; - } - - const_iterator cend() const { - return data + length; - } - - iterator end() { - return data + length; - } - - bool isOwner() const { - return owned; - } - - friend std::ostream& operator<<(std::ostream& out, const Table& table) { - return out << "[" << join(",",table) << "]"; - } - - void store(std::ostream& out) const { - // write length and data - write(out,length); - write(out,data,data+length); - - // write padding bytes - forEachPaddingByte([&]{ - write(out,(char)0); - }); - - } - - static Table load(std::istream& in) { - - Table res; - - res.owned = true; - res.length = read(in); - res.data = allocate(res.length); - read(in,res.begin(),res.end()); - - // consume padding bytes - res.forEachPaddingByte([&]{ - read(in); - }); - - return res; - } - - static Table interpret(utils::RawBuffer& buffer) { - - Table res; - res.owned = false; - res.length = buffer.consume(); - res.data = buffer.consumeArray(res.length); - - // consume padding bytes - res.forEachPaddingByte([&]{ - buffer.consume(); - }); - - return res; - - } - - private: - - static T* allocate(std::size_t size) { - if (size == 0) return nullptr; - return reinterpret_cast(malloc(sizeof(T)*size)); - } - - template - void forEachPaddingByte(const Body& body) const { - auto c = (sizeof(T)*length) % 8; - while(c%8 != 0) { - body(); - c++; - } - } - - }; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/tuple_utils.h b/vendor/allscale/utils/tuple_utils.h deleted file mode 100644 index 5bf8ede2c..000000000 --- a/vendor/allscale/utils/tuple_utils.h +++ /dev/null @@ -1,111 +0,0 @@ -#include -#include - -namespace allscale { -namespace utils { - - namespace detail { - - template - struct tuple_for_each_helper { - template - void operator()(const Op& op, std::tuple& tuple) { - tuple_for_each_helper()(op,tuple); - op(std::get(tuple)); - } - template - void operator()(const Op& op, const std::tuple& tuple) { - tuple_for_each_helper()(op,tuple); - op(std::get(tuple)); - } - }; - - template<> - struct tuple_for_each_helper<0> { - template - void operator()(const Op&, const std::tuple&) { - // nothing - } - }; - - } - - /** - * A utility to apply an operator on all elements of a tuple in order. - * - * @param tuple the (mutable) tuple - * @param op the operator to be applied - */ - template - void forEach(std::tuple& tuple, const Op& op) { - detail::tuple_for_each_helper()(op,tuple); - } - - /** - * A utility to apply an operator on all elements of a tuple in order. - * - * @param tuple the (constant) tuple - * @param op the operator to be applied - */ - template - void forEach(const std::tuple& tuple, const Op& op) { - detail::tuple_for_each_helper()(op,tuple); - } - - namespace detail { - - template - auto map_helper(const std::tuple& in, const Op& op, std::integer_sequence) { - return std::make_tuple(op(std::get(in))...); - } - - template - auto map_helper(std::tuple& in, const Op& op, std::integer_sequence) { - return std::make_tuple(op(std::get(in))...); - } - - } - - /** - * A utility to apply a transformation on each element of a given tuple and return a a tuple containing - * the results. - * - * @param tuple the (constant) input tuple - * @param op the operation to be applied on each element of the tuple - */ - template - auto map(const std::tuple& tuple, const Op& op) { - return detail::map_helper(tuple,op,std::make_integer_sequence()); - } - - /** - * A utility to apply a transformation on each element of a given tuple and return a a tuple containing - * the results. - * - * @param tuple the (mutable) input tuple - * @param op the operation to be applied on each element of the tuple - */ - template - auto map(std::tuple& tuple, const Op& op) { - return detail::map_helper(tuple,op,std::make_integer_sequence()); - } - -} // end namespace utils -} // end namespace allscale - -namespace std { - - template - std::ostream& operator<<(std::ostream& out, const std::tuple& tuple) { - out << "("; - std::size_t count = 0; - const std::size_t numElements = sizeof...(Elements); - allscale::utils::forEach(tuple,[&](const auto& cur) { - out << cur; - count++; - if (count != numElements) out << ","; - }); - return out << ")"; - } - -} diff --git a/vendor/allscale/utils/type_list.h b/vendor/allscale/utils/type_list.h deleted file mode 100644 index 91caa3857..000000000 --- a/vendor/allscale/utils/type_list.h +++ /dev/null @@ -1,66 +0,0 @@ -#pragma once - -#include - -namespace allscale { -namespace utils { - - - // -------------------- Type List traits ---------------------------- - - template - struct type_list { - enum { length = sizeof...(Ts) }; - enum { empty = (length == 0) }; - }; - - - // -- test whether a given list contains a given type -- - - template - struct type_list_contains; - - template - struct type_list_contains> : public std::true_type {}; - - template - struct type_list_contains> : public type_list_contains> {}; - - template - struct type_list_contains> : public std::false_type {}; - - - // -- extracts a type at a given position -- - - template - struct type_at; - - template - struct type_at<0, type_list> { - typedef H type; - }; - - template - struct type_at> { - typedef typename type_at>::type type; - }; - - - // -- obtains the index of a given type -- - - template - struct type_index; - - template - struct type_index> { - enum { value = 0 }; - }; - - template - struct type_index> { - enum { value = type_index>::value + 1 }; - }; - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/unused.h b/vendor/allscale/utils/unused.h deleted file mode 100644 index 7e696122f..000000000 --- a/vendor/allscale/utils/unused.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -/** - * This header defines a macro to mark knowingly unused variables as being - * unused, so that the compiler is not issuing warnings about those. - */ - -#ifdef __GNUC__ - #define __allscale_unused __attribute__((unused)) -#else - #define __allscale_unused -#endif diff --git a/vendor/allscale/utils/vector.h b/vendor/allscale/utils/vector.h deleted file mode 100644 index 611d9ea4b..000000000 --- a/vendor/allscale/utils/vector.h +++ /dev/null @@ -1,415 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "allscale/utils/printer/arrays.h" -#include "allscale/utils/assert.h" -#include "allscale/utils/unused.h" -#include "allscale/utils/serializer/arrays.h" - -namespace allscale { -namespace utils { - - // generic vector implementation - template - class Vector { - - std::array data; - - public: - - using element_type = T; - - Vector() = default; - - Vector(const T& e) { - data.fill(e); - } - - Vector(const Vector&) = default; - Vector(Vector&&) = default; - - template - Vector(const Vector& other) - : data(other.data) {} - - template - Vector(const std::array& other) - : data(other) {} - - Vector(const std::initializer_list& values) { - assert_eq(Dims,values.size()); - init(values); - } - - template - Vector(T a, T b, Rest ... rest) : data{ {a,b,rest...} } { - static_assert(Dims == sizeof...(rest)+2, "Invalid number of components!"); - } - - - Vector& operator=(const Vector& other) = default; - Vector& operator=(Vector&& other) = default; - - T& operator[](const std::size_t index) { - return data[index]; - } - - const T& operator[](const std::size_t index) const { - return data[index]; - } - - // relational operators - // defined in-class, since the private std::array data member has matching operators to forward to - - bool operator==(const Vector& other) const { - return data == other.data; - } - - bool operator!=(const Vector& other) const { - return !(data == other.data); - } - - bool operator<(const Vector& other) const { - return data < other.data; - } - - bool operator<=(const Vector& other) const { - return data <= other.data; - } - - bool operator>=(const Vector& other) const { - return data >= other.data; - } - - bool operator>(const Vector& other) const { - return data > other.data; - } - - // allow implicit casts to std::array - operator const std::array&() const { return data; } - - bool dominatedBy(const Vector& other) const { - for(std::size_t i=0; i& other) const { - for(std::size_t i=0; i - void init_internal(const std::initializer_list& list, const std::integer_sequence&) { - __allscale_unused auto bla = { data[Index] = *(list.begin() + Index) ... }; - } - - template - void init(const std::initializer_list& list) { - init_internal(list,std::make_index_sequence()); - } - - }; - - template - Vector& operator+=(Vector& a, const Vector& b) { - for(std::size_t i = 0; i - Vector& operator-=(Vector& a, const Vector& b) { - for(size_t i = 0; i - Vector& operator*=(Vector& a, const S& fac) { - for(size_t i =0; i - Vector& operator/=(Vector& a, const S& fac) { - for(size_t i =0; i - Vector operator+(const Vector& a, const Vector& b) { - Vector res(a); - return res += b; - } - - template - Vector operator-(const Vector& a, const Vector& b) { - Vector res(a); - return res -= b; - } - - template - Vector operator*(const Vector& vec, const S& fac) { - Vector res(vec); - return res *= fac; - } - - template - Vector operator*(const S& fac, const Vector& vec) { - return vec * fac; - } - - template - Vector operator/(const Vector& vec, const S& fac) { - Vector res(vec); - return res /= fac; - } - - template - Vector elementwise(const Vector& a, const Vector& b, const Lambda& op) { - Vector res; - for(unsigned i=0; i - Vector elementwiseMin(const Vector& a, const Vector& b) { - return elementwise(a,b,[](const T& a, const T& b) { return std::min(a,b); }); - } - - template - Vector elementwiseMax(const Vector& a, const Vector& b) { - return elementwise(a,b,[](const T& a, const T& b) { return std::max(a,b); }); - } - - template - Vector elementwiseProduct(const Vector& a, const Vector& b) { - return elementwise(a,b,[](const T& a, const T& b) { return a*b; }); - } - - template - Vector elementwiseDivision(const Vector& a, const Vector& b) { - return elementwise(a,b,[](const T& a, const T& b) { return a/b; }); - } - - template - Vector elementwiseRemainder(const Vector& a, const Vector& b) { - return elementwise(a,b,[](const T& a, const T& b) { return a % b; }); - } - - template - Vector elementwiseModulo(const Vector& a, const Vector& b) { - return elementwiseRemainder(a,b); - } - - - template - T sumOfSquares(const Vector& vec) { - T sum = T(); - for(unsigned i = 0; i < Dims; i++) { - sum += vec[i] * vec[i]; - } - return sum; - } - - // specialization for 3-dimensional vectors, providing access to named data members x, y, z - template - class Vector { - public: - - using element_type = T; - - T x, y, z; - - Vector() = default; - - Vector(const T& e) : x(e), y(e), z(e) { } - - Vector(T x, T y, T z) : x(x), y(y), z(z) { } - - Vector(const Vector&) = default; - Vector(Vector&&) = default; - - template - Vector(const Vector& other) : x(other.x), y(other.y), z(other.z) {} - - template - Vector(const std::array& other) : x(other[0]), y(other[1]), z(other[2]) {} - - T& operator[](std::size_t i) { - return (i==0) ? x : (i==1) ? y : z; - } - - const T& operator[](std::size_t i) const { - return (i==0) ? x : (i==1) ? y : z; - } - - Vector& operator=(const Vector& other) = default; - Vector& operator=(Vector&& other) = default; - - bool operator==(const Vector& other) const { - return std::tie(x,y,z) == std::tie(other.x,other.y,other.z); - } - - bool operator!=(const Vector& other) const { - return !(*this == other); - } - - bool operator<(const Vector& other) const { - return asArray() < other.asArray(); - } - - bool operator<=(const Vector& other) const { - return asArray() <= other.asArray(); - } - - bool operator>=(const Vector& other) const { - return asArray() >= other.asArray(); - } - - bool operator>(const Vector& other) const { - return asArray() > other.asArray(); - } - - operator const std::array&() const { return asArray(); } - - const std::array& asArray() const { - return reinterpret_cast&>(*this); - } - - bool dominatedBy(const Vector& other) const { - return other.x >= x && other.y >= y && other.z >= z; - } - - bool strictlyDominatedBy(const Vector& other) const { - return other.x > x && other.y > y && other.z > z; - } - - // Adds printer support to this vector. - friend std::ostream& operator<<(std::ostream& out, const Vector& vec) { - return out << "[" << vec.x << "," << vec.y << "," << vec.z << "]"; - } - - }; - - template - Vector crossProduct(const Vector& a, const Vector& b) { - return Vector { - a[1] * b[2] - a[2] * b[1], - a[2] * b[0] - a[0] * b[2], - a[0] * b[1] - a[1] * b[0] - }; - } - - // specialization for 2-dimensional vectors, providing access to named data members x, y - template - class Vector { - public: - - using element_type = T; - - T x, y; - - Vector() = default; - - Vector(const T& e) : x(e), y(e) { } - - Vector(T x, T y) : x(x), y(y) { } - - Vector(const Vector&) = default; - Vector(Vector&&) = default; - - template - Vector(const Vector& other) : x(other.x), y(other.y) {} - - template - Vector(const std::array& other) : x(other[0]), y(other[1]) {} - - T& operator[](std::size_t i) { - return (i == 0) ? x : y; - } - - const T& operator[](std::size_t i) const { - return (i == 0) ? x : y; - } - - Vector& operator=(const Vector& other) = default; - Vector& operator=(Vector&& other) = default; - - bool operator==(const Vector& other) const { - return asArray() == other.asArray(); - } - - bool operator!=(const Vector& other) const { - return !(*this == other); - } - - bool operator<(const Vector& other) const { - return asArray() < other.asArray(); - } - - bool operator<=(const Vector& other) const { - return asArray() <= other.asArray(); - } - - bool operator>=(const Vector& other) const { - return asArray() >= other.asArray(); - } - - bool operator>(const Vector& other) const { - return asArray() > other.asArray(); - } - - operator const std::array&() const { return asArray(); } - - const std::array& asArray() const { - return reinterpret_cast&>(*this); - } - - bool dominatedBy(const Vector& other) const { - return other.x >= x && other.y >= y; - } - - bool strictlyDominatedBy(const Vector& other) const { - return other.x > x && other.y > y; - } - - // Adds printer support to this vector. - friend std::ostream& operator<<(std::ostream& out, const Vector& vec) { - return out << "[" << vec.x << "," << vec.y << "]"; - } - - }; - - /** - * Add support for serializing / de-serializing Vector instances. - * The implementation is simply re-using the serializing capabilities of arrays. - */ - template - struct serializer,typename std::enable_if::value,void>::type> : public serializer> {}; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/vector_utils.h b/vendor/allscale/utils/vector_utils.h deleted file mode 100644 index 7707eac06..000000000 --- a/vendor/allscale/utils/vector_utils.h +++ /dev/null @@ -1,78 +0,0 @@ -#pragma once - -#include - -namespace allscale { -namespace utils { - - namespace { - - /** - * The terminal case of a function where a variable number of arguments is written into a vector in proper order. - * - * @tparam T the element type maintained within the extended vector - * @param vector the vector to which nothing is written to - */ - template - inline void appendToVector(std::vector&) {} - - /** - * A variable-argument function writing elements into a vector in the given order. - * - * @tparam T the type of element maintained within the modified vector - * @tparam Elements the types of the remaining elements (need to be convertible to T) - * @param vector the vector to be written to - * @param first the next element to be added - * @param rest the remaining elements to be added - */ - template - inline void appendToVector(std::vector& vector, const T& first, const Elements& ... rest) { - vector.push_back(first); - appendToVector(vector, rest...); - } - - } - - /** - * Create an empty vector containing no elements. - * - * @tparam T the type of element to be stored in the resulting vector - * @return the resulting vector - */ - template - inline std::vector toVector() { - return std::vector (); - } - - /** - * Creates a vector containing the given elements. - * - * @tparam T the type of element to be stored in the resulting vector - * @tparam Elements the types of the remaining elements (need to be convertible to T) - * @param first the first element to be within the list - * @param rest the remaining elements to be stored within the list - * @return the resulting vector - */ - template - inline std::vector toVector(const T& first, const Elements& ... rest) { - std::vector res; - res.reserve(1 + sizeof...(rest)); - appendToVector(res, first, rest...); - return res; - } - - - template - struct is_vector : public std::false_type {}; - - template - struct is_vector> : public std::true_type {}; - - template - struct is_vector : public is_vector {}; - - template - struct is_vector : public is_vector {}; - -} // end namespace utils -} // end namespace allscale From 9e1f7dfe2fc73e2788ab7dc312c002e68a3f8e8d Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Mon, 14 Aug 2023 17:38:54 +0200 Subject: [PATCH 07/17] Move grid benchmarks to benchmark executable --- test/CMakeLists.txt | 16 +- test/{benchmarks.cc => dag_benchmarks.cc} | 0 test/grid_benchmarks.cc | 193 ++++++++++++ test/grid_test_utils.cc | 112 +++++++ test/grid_test_utils.h | 22 ++ test/grid_tests.cc | 356 ++-------------------- 6 files changed, 359 insertions(+), 340 deletions(-) rename test/{benchmarks.cc => dag_benchmarks.cc} (100%) create mode 100644 test/grid_benchmarks.cc create mode 100644 test/grid_test_utils.cc create mode 100644 test/grid_test_utils.h diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d4774ca2c..0d03c8c2e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -47,8 +47,8 @@ set(TEST_TARGETS device_selection_tests ) -add_library(test_main test_main.cc) -set_test_target_parameters(test_main test_main.cc) +add_library(test_main test_main.cc grid_test_utils.cc) +set_test_target_parameters(test_main test_main.cc grid_test_utils.cc) set(TEST_OBJ_LIST "") foreach(TEST_TARGET ${TEST_TARGETS}) @@ -70,17 +70,15 @@ foreach(TEST_TARGET ${TEST_TARGETS}) ParseAndAddCatchTests_ParseFile(${TEST_SOURCE} ${TEST_TARGET}) endforeach() -target_sources(grid_tests PRIVATE benchmark_reporters.cc) # TODO HACK - # Add all_tests executable add_executable(all_tests ${TEST_OBJ_LIST}) target_link_libraries(all_tests PRIVATE test_main) set_test_target_parameters(all_tests "") # Unit benchmark executable -add_executable(benchmarks benchmarks.cc system_benchmarks.cc benchmark_reporters.cc) +add_executable(benchmarks dag_benchmarks.cc grid_benchmarks.cc system_benchmarks.cc benchmark_reporters.cc) target_link_libraries(benchmarks PRIVATE test_main) -set_test_target_parameters(benchmarks benchmarks.cc system_benchmarks.cc) +set_test_target_parameters(benchmarks dag_benchmarks.cc grid_benchmarks.cc system_benchmarks.cc) add_subdirectory(system) if(CELERITY_DETAIL_INTEGRATION_TESTING) @@ -100,9 +98,9 @@ find_library(CAIRO_LIBRARIES if(CAIRO_INCLUDE_DIRS AND CAIRO_LIBRARIES) message(STATUS "Building tests with cairo support: ${CAIRO_LIBRARIES}") - foreach(TEST_TARGET region_map_tests;grid_tests) - target_compile_definitions(${TEST_TARGET}_OBJ PRIVATE CELERITY_DETAIL_HAVE_CAIRO=1) - target_include_directories(${TEST_TARGET}_OBJ PRIVATE ${CAIRO_INCLUDE_DIRS}) + foreach(TEST_TARGET region_map_tests;region_map_tests_OBJ;test_main) + target_compile_definitions(${TEST_TARGET} PRIVATE CELERITY_DETAIL_HAVE_CAIRO=1) + target_include_directories(${TEST_TARGET} PRIVATE ${CAIRO_INCLUDE_DIRS}) target_link_libraries(${TEST_TARGET} PRIVATE ${CAIRO_LIBRARIES}) endforeach() target_link_libraries(all_tests PRIVATE ${CAIRO_LIBRARIES}) diff --git a/test/benchmarks.cc b/test/dag_benchmarks.cc similarity index 100% rename from test/benchmarks.cc rename to test/dag_benchmarks.cc diff --git a/test/grid_benchmarks.cc b/test/grid_benchmarks.cc new file mode 100644 index 000000000..5d19fdc1e --- /dev/null +++ b/test/grid_benchmarks.cc @@ -0,0 +1,193 @@ +#include "grid_test_utils.h" + +#include +#include +#include +#include + +#include +#include +#include + +using namespace celerity; +using namespace celerity::detail; + +template +std::vector> create_random_boxes(const size_t grid_size, const size_t max_box_size, const size_t num_boxes, const uint32_t seed) { + std::minstd_rand rng(seed); + std::uniform_int_distribution offset_dist(0, grid_size - 1); + std::binomial_distribution range_dist(max_box_size - 1, 0.5); + std::vector> boxes; + while(boxes.size() < num_boxes) { + subrange sr; + bool inbounds = true; + for(int d = 0; d < Dims; ++d) { + sr.offset[d] = offset_dist(rng); + sr.range[d] = 1 + range_dist(rng); + inbounds &= sr.offset[d] + sr.range[d] <= grid_size; + } + if(inbounds) { boxes.emplace_back(sr); } + } + return boxes; +} + +TEST_CASE("normalizing randomized box sets - 2d", "[benchmark][grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 200}, + })); + + const auto input_2d = create_random_boxes<2>(grid_size, max_box_size, num_boxes, 42); + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(input_2d)); }; + + const auto input_3d = grid_detail::boxes_cast<3>(input_2d); + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(input_3d)); }; + + const auto normalized_2d = grid_detail::normalize(std::vector(input_2d)); + const auto normalized_3d = grid_detail::normalize(std::vector(input_3d)); + CHECK(normalized_3d == grid_detail::boxes_cast<3>(normalized_2d)); + + test_utils::render_boxes(input_2d, fmt::format("{}-input", label)); + test_utils::render_boxes(normalized_2d, fmt::format("{}-normalized", label)); +} + +TEST_CASE("normalizing randomized box sets - 3d", "[benchmark][grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 200}, + })); + + const auto input_3d = create_random_boxes<3>(grid_size, max_box_size, num_boxes, 42); + BENCHMARK(fmt::format("{} - native", label)) { return grid_detail::normalize(std::vector(input_3d)); }; + test_utils::black_hole(grid_detail::normalize(std::vector(input_3d))); // to attach a profiler +} + +template +std::vector> create_box_tiling(const size_t n_per_side) { + const size_t length = 5; + size_t n_linear = 1; + for(int d = 0; d < Dims; ++d) { + n_linear *= n_per_side; + } + std::vector> boxes(n_linear); + for(size_t i = 0; i < n_linear; ++i) { + subrange sr; + auto dist_i = i; + for(int d = 0; d < Dims; ++d) { + sr.offset[d] = length * (dist_i % n_per_side); + sr.range[d] = length; + dist_i /= n_per_side; + } + boxes[i] = sr; + } + return boxes; +} + +TEMPLATE_TEST_CASE_SIG("normalizing a fully mergeable tiling of boxes", "[benchmark][grid]", ((int Dims), Dims), 1, 2, 3) { + const auto [label, n] = GENERATE(values>({ + {"small", 4}, + {"medium", 50}, + {"large", 1000}, + })); + + const size_t n_per_side = llrint(pow(n, 1.0 / Dims)); + + const auto boxes_nd = create_box_tiling(n_per_side); + const auto normalized_nd = grid_detail::normalize(std::vector(boxes_nd)); + CHECK(normalized_nd.size() == 1); + + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(boxes_nd)); }; + + if constexpr(Dims < 3) { + const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_nd); + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(boxes_3d)); }; + } + + if constexpr(Dims == 2) { + test_utils::render_boxes(boxes_nd, fmt::format("{}-input", label)); + test_utils::render_boxes(normalized_nd, fmt::format("{}-normalized", label)); + } +} + +TEST_CASE("performing set operations between randomized regions - 2d", "[benchmark][grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 100}, + })); + + const std::vector inputs_2d{ + region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 13)), region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 37))}; + const std::vector inputs_3d{region_cast<3>(inputs_2d[0]), region_cast<3>(inputs_2d[1])}; + + test_utils::render_boxes(inputs_2d[0].get_boxes(), fmt::format("{}-input-a", label)); + test_utils::render_boxes(inputs_2d[1].get_boxes(), fmt::format("{}-input-b", label)); + + BENCHMARK(fmt::format("union, {}, native", label)) { return region_union(inputs_2d[0], inputs_2d[1]); }; + BENCHMARK(fmt::format("union, {}, embedded in 3d", label)) { return region_union(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("intersection, {}, native", label)) { return region_intersection(inputs_2d[0], inputs_2d[1]); }; + BENCHMARK(fmt::format("intersection, {}, embedded in 3d", label)) { return region_intersection(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("difference, {}, native", label)) { return region_difference(inputs_2d[0], inputs_2d[1]); }; + BENCHMARK(fmt::format("difference, {}, embedded in 3d", label)) { return region_difference(inputs_3d[0], inputs_3d[1]); }; + + const auto union_2d = region_union(inputs_2d[0], inputs_2d[1]); + const auto union_3d = region_union(inputs_3d[0], inputs_3d[1]); + const auto intersection_2d = region_intersection(inputs_2d[0], inputs_2d[1]); + const auto intersection_3d = region_intersection(inputs_3d[0], inputs_3d[1]); + const auto difference_2d = region_difference(inputs_2d[0], inputs_2d[1]); + const auto difference_3d = region_difference(inputs_3d[0], inputs_3d[1]); + + CHECK(union_3d == region_cast<3>(union_2d)); + CHECK(intersection_3d == region_cast<3>(intersection_2d)); + CHECK(difference_3d == region_cast<3>(difference_2d)); + + test_utils::render_boxes(union_2d.get_boxes(), fmt::format("union-{}", label)); + test_utils::render_boxes(intersection_2d.get_boxes(), fmt::format("intersection-{}", label)); + test_utils::render_boxes(difference_2d.get_boxes(), fmt::format("difference-{}", label)); +} + +TEST_CASE("performing set operations between randomized regions - 3d", "[benchmark][grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 100}, + })); + + const std::vector inputs_3d{ + region(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 13)), region(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 37))}; + + BENCHMARK(fmt::format("union, {}, native", label)) { return region_union(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("intersection, {}, native", label)) { return region_intersection(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("difference, {}, native", label)) { return region_difference(inputs_3d[0], inputs_3d[1]); }; + + // to attach a profiler + test_utils::black_hole(region_union(inputs_3d[0], inputs_3d[1])); + test_utils::black_hole(region_intersection(inputs_3d[0], inputs_3d[1])); + test_utils::black_hole(region_difference(inputs_3d[0], inputs_3d[1])); +} + +std::vector> create_interlocking_boxes(const size_t num_boxes_per_side) { + std::vector> boxes; + for(size_t i = 0; i < num_boxes_per_side; ++i) { + boxes.emplace_back(id<2>(i, i), id<2>(i + 1, num_boxes_per_side)); + boxes.emplace_back(id<2>(i + 1, i), id<2>(num_boxes_per_side, i + 1)); + } + return boxes; +} + +TEST_CASE("normalizing a fully mergeable, complex tiling of boxes - 2d", "[benchmark][grid]") { + const auto [label, n] = GENERATE(values>({ + {"small", 10}, + {"large", 200}, + })); + + const auto boxes_2d = create_interlocking_boxes(n); + const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_2d); + + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(boxes_2d)); }; + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(boxes_3d)); }; + + test_utils::render_boxes(boxes_2d, fmt::format("{}-input", label)); +} diff --git a/test/grid_test_utils.cc b/test/grid_test_utils.cc new file mode 100644 index 000000000..3d904307c --- /dev/null +++ b/test/grid_test_utils.cc @@ -0,0 +1,112 @@ +#include "grid_test_utils.h" + +#if CELERITY_DETAIL_HAVE_CAIRO +#include +#endif + +using namespace celerity; +using namespace celerity::detail; + +// forward declarations for functions not exposed in grid.h +namespace celerity::test_utils { + +// input: h as an angle in [0,360] and s,l in [0,1] - output: r,g,b in [0,1] +std::array hsl2rgb(const float h, const float s, const float l) { + constexpr auto hue2rgb = [](const float p, const float q, float t) { + if(t < 0) t += 1; + if(t > 1) t -= 1; + if(t < 1.f / 6) return p + (q - p) * 6 * t; + if(t < 1.f / 2) return q; + if(t < 2.f / 3) return p + (q - p) * (2.f / 3 - t) * 6; + return p; + }; + + if(s == 0) return {l, l, l}; // achromatic + + const auto q = l < 0.5 ? l * (1 + s) : l + s - l * s; + const auto p = 2 * l - q; + const auto r = hue2rgb(p, q, h + 1.f / 3); + const auto g = hue2rgb(p, q, h); + const auto b = hue2rgb(p, q, h - 1.f / 3); + return {r, g, b}; +} + +void render_boxes(const std::vector>& boxes, const std::string_view suffix) { +#if CELERITY_DETAIL_HAVE_CAIRO + const auto env = std::getenv("CELERITY_RENDER_REGIONS"); + if(env == nullptr || env[0] == 0) return; + + constexpr int ruler_width = 30; + constexpr int ruler_space = 4; + constexpr int text_margin = 2; + constexpr int border_start = ruler_width + ruler_space; + constexpr int cell_size = 20; + constexpr int border_end = 30; + constexpr int inset = 1; + + const auto bounds = bounding_box(boxes); + const auto canvas_width = border_start + static_cast(bounds.get_max()[1]) * cell_size + border_end; + const auto canvas_height = border_start + static_cast(bounds.get_max()[0]) * cell_size + border_end; + + cairo_surface_t* surface = cairo_image_surface_create(CAIRO_FORMAT_ARGB32, canvas_width, canvas_height); + cairo_t* cr = cairo_create(surface); + + cairo_select_font_face(cr, "sans", CAIRO_FONT_SLANT_NORMAL, CAIRO_FONT_WEIGHT_NORMAL); + cairo_set_font_size(cr, 12); + + cairo_set_source_rgb(cr, 0, 0, 0); + cairo_set_line_width(cr, 1); + for(int i = 0; i < static_cast(bounds.get_max()[1]) + 1; ++i) { + const auto x = border_start + 2 * inset + i * cell_size; + cairo_move_to(cr, static_cast(x) - 0.5f, text_margin); + cairo_line_to(cr, static_cast(x) - 0.5f, ruler_width); + cairo_stroke(cr); + const auto label = fmt::format("{}", i); + cairo_text_extents_t te; + cairo_text_extents(cr, label.c_str(), &te); + cairo_move_to(cr, x + text_margin, text_margin + te.height); + cairo_show_text(cr, label.c_str()); + } + for(int i = 0; i < static_cast(bounds.get_max()[0]) + 1; ++i) { + const auto y = border_start + 2 * inset + i * cell_size; + cairo_move_to(cr, text_margin, static_cast(y) - 0.5f); + cairo_line_to(cr, ruler_width, static_cast(y) - 0.5f); + cairo_stroke(cr); + const auto label = fmt::format("{}", i); + cairo_text_extents_t te; + cairo_text_extents(cr, label.c_str(), &te); + cairo_move_to(cr, text_margin, y + te.height + text_margin); + cairo_show_text(cr, label.c_str()); + } + + cairo_set_operator(cr, CAIRO_OPERATOR_HSL_HUE); + for(size_t i = 0; i < boxes.size(); ++i) { + const auto hue = static_cast(i) / static_cast(boxes.size()); + const auto [r, g, b] = hsl2rgb(hue, 0.8f, 0.6f); + cairo_set_source_rgb(cr, r, g, b); + const auto sr = static_cast>(boxes[i]); + const auto x = border_start + 2 * inset + static_cast(sr.offset[1]) * cell_size; + const auto y = border_start + 2 * inset + static_cast(sr.offset[0]) * cell_size; + const auto w = static_cast(sr.range[1]) * cell_size - 2 * inset; + const auto h = static_cast(sr.range[0]) * cell_size - 2 * inset; + cairo_rectangle(cr, x, y, w, h); + cairo_fill(cr); + } + + cairo_set_source_rgb(cr, 1.0, 1.0, 1.0); + cairo_rectangle(cr, 0, 0, canvas_width, canvas_height); + cairo_set_operator(cr, CAIRO_OPERATOR_DEST_OVER); + cairo_fill(cr); + + cairo_destroy(cr); + + const auto test_name = Catch::getResultCapture().getCurrentTestName(); + const auto image_name = fmt::format("{}-{}.png", std::regex_replace(test_name, std::regex("[^a-zA-Z0-9]+"), "-"), suffix); + cairo_surface_write_to_png(surface, image_name.c_str()); + cairo_surface_destroy(surface); +#else + (void)boxes; +#endif +} + +} diff --git a/test/grid_test_utils.h b/test/grid_test_utils.h new file mode 100644 index 000000000..01413089a --- /dev/null +++ b/test/grid_test_utils.h @@ -0,0 +1,22 @@ +#include "grid.h" +#include "test_utils.h" + +namespace celerity::test_utils { + +struct partition_vector_order { + template + bool operator()(const std::vector>& lhs, const std::vector>& rhs) { + if(lhs.size() < rhs.size()) return true; + if(lhs.size() > rhs.size()) return false; + constexpr detail::box_coordinate_order box_order; + for(size_t i = 0; i < lhs.size(); ++i) { + if(box_order(lhs[i], rhs[i])) return true; + if(box_order(rhs[i], lhs[i])) return false; + } + return false; + } +}; + +void render_boxes(const std::vector>& boxes, const std::string_view suffix = "region"); + +} diff --git a/test/grid_tests.cc b/test/grid_tests.cc index f1bb150fd..43d89f5e5 100644 --- a/test/grid_tests.cc +++ b/test/grid_tests.cc @@ -1,5 +1,4 @@ -#include "grid.h" -#include "test_utils.h" +#include "grid_test_utils.h" #include #include @@ -7,135 +6,12 @@ #include #include -#include #include #include -#if CELERITY_DETAIL_HAVE_CAIRO -#include -#endif using namespace celerity; using namespace celerity::detail; -// forward declarations for functions not exposed in grid.h -namespace celerity::detail::grid_detail { - -} // namespace celerity::detail::grid_detail - -struct partition_vector_order { - template - bool operator()(const std::vector>& lhs, const std::vector>& rhs) { - if(lhs.size() < rhs.size()) return true; - if(lhs.size() > rhs.size()) return false; - constexpr box_coordinate_order box_order; - for(size_t i = 0; i < lhs.size(); ++i) { - if(box_order(lhs[i], rhs[i])) return true; - if(box_order(rhs[i], lhs[i])) return false; - } - return false; - } -}; - -// input: h as an angle in [0,360] and s,l in [0,1] - output: r,g,b in [0,1] -std::array hsl2rgb(const float h, const float s, const float l) { - constexpr auto hue2rgb = [](const float p, const float q, float t) { - if(t < 0) t += 1; - if(t > 1) t -= 1; - if(t < 1.f / 6) return p + (q - p) * 6 * t; - if(t < 1.f / 2) return q; - if(t < 2.f / 3) return p + (q - p) * (2.f / 3 - t) * 6; - return p; - }; - - if(s == 0) return {l, l, l}; // achromatic - - const auto q = l < 0.5 ? l * (1 + s) : l + s - l * s; - const auto p = 2 * l - q; - const auto r = hue2rgb(p, q, h + 1.f / 3); - const auto g = hue2rgb(p, q, h); - const auto b = hue2rgb(p, q, h - 1.f / 3); - return {r, g, b}; -} - -void render_boxes(const std::vector>& boxes, const std::string_view suffix = "region") { -#if CELERITY_DETAIL_HAVE_CAIRO - const auto env = std::getenv("CELERITY_RENDER_REGIONS"); - if(env == nullptr || env[0] == 0) return; - - constexpr int ruler_width = 30; - constexpr int ruler_space = 4; - constexpr int text_margin = 2; - constexpr int border_start = ruler_width + ruler_space; - constexpr int cell_size = 20; - constexpr int border_end = 30; - constexpr int inset = 1; - - const auto bounds = bounding_box(boxes); - const auto canvas_width = border_start + static_cast(bounds.get_max()[1]) * cell_size + border_end; - const auto canvas_height = border_start + static_cast(bounds.get_max()[0]) * cell_size + border_end; - - cairo_surface_t* surface = cairo_image_surface_create(CAIRO_FORMAT_ARGB32, canvas_width, canvas_height); - cairo_t* cr = cairo_create(surface); - - cairo_select_font_face(cr, "sans", CAIRO_FONT_SLANT_NORMAL, CAIRO_FONT_WEIGHT_NORMAL); - cairo_set_font_size(cr, 12); - - cairo_set_source_rgb(cr, 0, 0, 0); - cairo_set_line_width(cr, 1); - for(int i = 0; i < static_cast(bounds.get_max()[1]) + 1; ++i) { - const auto x = border_start + 2 * inset + i * cell_size; - cairo_move_to(cr, static_cast(x) - 0.5f, text_margin); - cairo_line_to(cr, static_cast(x) - 0.5f, ruler_width); - cairo_stroke(cr); - const auto label = fmt::format("{}", i); - cairo_text_extents_t te; - cairo_text_extents(cr, label.c_str(), &te); - cairo_move_to(cr, x + text_margin, text_margin + te.height); - cairo_show_text(cr, label.c_str()); - } - for(int i = 0; i < static_cast(bounds.get_max()[0]) + 1; ++i) { - const auto y = border_start + 2 * inset + i * cell_size; - cairo_move_to(cr, text_margin, static_cast(y) - 0.5f); - cairo_line_to(cr, ruler_width, static_cast(y) - 0.5f); - cairo_stroke(cr); - const auto label = fmt::format("{}", i); - cairo_text_extents_t te; - cairo_text_extents(cr, label.c_str(), &te); - cairo_move_to(cr, text_margin, y + te.height + text_margin); - cairo_show_text(cr, label.c_str()); - } - - cairo_set_operator(cr, CAIRO_OPERATOR_HSL_HUE); - for(size_t i = 0; i < boxes.size(); ++i) { - const auto hue = static_cast(i) / static_cast(boxes.size()); - const auto [r, g, b] = hsl2rgb(hue, 0.8f, 0.6f); - cairo_set_source_rgb(cr, r, g, b); - const auto sr = static_cast>(boxes[i]); - const auto x = border_start + 2 * inset + static_cast(sr.offset[1]) * cell_size; - const auto y = border_start + 2 * inset + static_cast(sr.offset[0]) * cell_size; - const auto w = static_cast(sr.range[1]) * cell_size - 2 * inset; - const auto h = static_cast(sr.range[0]) * cell_size - 2 * inset; - cairo_rectangle(cr, x, y, w, h); - cairo_fill(cr); - } - - cairo_set_source_rgb(cr, 1.0, 1.0, 1.0); - cairo_rectangle(cr, 0, 0, canvas_width, canvas_height); - cairo_set_operator(cr, CAIRO_OPERATOR_DEST_OVER); - cairo_fill(cr); - - cairo_destroy(cr); - - const auto test_name = Catch::getResultCapture().getCurrentTestName(); - const auto image_name = fmt::format("{}-{}.png", std::regex_replace(test_name, std::regex("[^a-zA-Z0-9]+"), "-"), suffix); - cairo_surface_write_to_png(surface, image_name.c_str()); - cairo_surface_destroy(surface); -#else - (void)boxes; -#endif -} - - TEST_CASE("split_box dissects boxes as expected - 3d", "[grid]") { const box<3> input_box{{0, 0, 0}, {7, 9, 5}}; const std::vector> cuts{ @@ -228,9 +104,9 @@ TEST_CASE("directional merge of non-overlapping boxes - 2d", "[grid]") { }; test_directional_merge<1>(unmerged, merged_dim1); - render_boxes(unmerged, "unmerged"); - render_boxes(merged_dim0, "merged-dim0"); - render_boxes(merged_dim1, "merged-dim1"); + test_utils::render_boxes(unmerged, "unmerged"); + test_utils::render_boxes(merged_dim0, "merged-dim0"); + test_utils::render_boxes(merged_dim1, "merged-dim1"); } TEST_CASE("directional merge of overlapping boxes - 2d", "[grid]") { @@ -265,9 +141,9 @@ TEST_CASE("directional merge of overlapping boxes - 2d", "[grid]") { }; test_directional_merge<1>(unmerged, merged_dim1); - render_boxes(unmerged, "unmerged"); - render_boxes(merged_dim0, "merged-dim0"); - render_boxes(merged_dim1, "merged-dim1"); + test_utils::render_boxes(unmerged, "unmerged"); + test_utils::render_boxes(merged_dim0, "merged-dim0"); + test_utils::render_boxes(merged_dim1, "merged-dim1"); } TEST_CASE("directional merge of non-overlapping 3d boxes", "[grid]") { @@ -323,9 +199,9 @@ TEST_CASE("region normalization removes overlaps - 2d", "[grid]") { std::sort(normalized.begin(), normalized.end(), box_coordinate_order()); CHECK(result == normalized); - render_boxes(overlapping, "input"); - render_boxes(result, "result"); - render_boxes(normalized, "normalized"); + test_utils::render_boxes(overlapping, "input"); + test_utils::render_boxes(result, "result"); + test_utils::render_boxes(normalized, "normalized"); } TEST_CASE("region normalization maximizes extent of fast dimensions - 2d", "[grid]") { @@ -346,112 +222,11 @@ TEST_CASE("region normalization maximizes extent of fast dimensions - 2d", "[gri std::sort(normalized.begin(), normalized.end(), box_coordinate_order()); CHECK(result == normalized); - render_boxes(input, "input"); - render_boxes(result, "result"); - render_boxes(normalized, "normalized"); -} - -template -std::vector> create_random_boxes(const size_t grid_size, const size_t max_box_size, const size_t num_boxes, const uint32_t seed) { - std::minstd_rand rng(seed); - std::uniform_int_distribution offset_dist(0, grid_size - 1); - std::binomial_distribution range_dist(max_box_size - 1, 0.5); - std::vector> boxes; - while(boxes.size() < num_boxes) { - subrange sr; - bool inbounds = true; - for(int d = 0; d < Dims; ++d) { - sr.offset[d] = offset_dist(rng); - sr.range[d] = 1 + range_dist(rng); - inbounds &= sr.offset[d] + sr.range[d] <= grid_size; - } - if(inbounds) { boxes.emplace_back(sr); } - } - return boxes; -} - -TEST_CASE("normalizing randomized box sets - 2d", "[grid]") { - const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ - {"small", 10, 5, 4}, - {"medium", 50, 1, 50}, - {"large", 200, 20, 200}, - })); - - const auto input_2d = create_random_boxes<2>(grid_size, max_box_size, num_boxes, 42); - BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(input_2d)); }; - - const auto input_3d = grid_detail::boxes_cast<3>(input_2d); - BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(input_3d)); }; - - const auto normalized_2d = grid_detail::normalize(std::vector(input_2d)); - const auto normalized_3d = grid_detail::normalize(std::vector(input_3d)); - CHECK(normalized_3d == grid_detail::boxes_cast<3>(normalized_2d)); - - render_boxes(input_2d, fmt::format("{}-input", label)); - render_boxes(normalized_2d, fmt::format("{}-normalized", label)); + test_utils::render_boxes(input, "input"); + test_utils::render_boxes(result, "result"); + test_utils::render_boxes(normalized, "normalized"); } -TEST_CASE("normalizing randomized box sets - 3d", "[grid]") { - const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ - {"small", 10, 5, 4}, - {"medium", 50, 1, 50}, - {"large", 200, 20, 200}, - })); - - const auto input_3d = create_random_boxes<3>(grid_size, max_box_size, num_boxes, 42); - BENCHMARK(fmt::format("{} - native", label)) { return grid_detail::normalize(std::vector(input_3d)); }; - test_utils::black_hole(grid_detail::normalize(std::vector(input_3d))); // to attach a profiler -} - -template -std::vector> create_box_tiling(const size_t n_per_side) { - const size_t length = 5; - size_t n_linear = 1; - for(int d = 0; d < Dims; ++d) { - n_linear *= n_per_side; - } - std::vector> boxes(n_linear); - for(size_t i = 0; i < n_linear; ++i) { - subrange sr; - auto dist_i = i; - for(int d = 0; d < Dims; ++d) { - sr.offset[d] = length * (dist_i % n_per_side); - sr.range[d] = length; - dist_i /= n_per_side; - } - boxes[i] = sr; - } - return boxes; -} - -TEMPLATE_TEST_CASE_SIG("normalizing a fully mergeable tiling of boxes", "[grid]", ((int Dims), Dims), 1, 2, 3) { - const auto [label, n] = GENERATE(values>({ - {"small", 4}, - {"medium", 50}, - {"large", 1000}, - })); - - const size_t n_per_side = llrint(pow(n, 1.0 / Dims)); - - const auto boxes_nd = create_box_tiling(n_per_side); - const auto normalized_nd = grid_detail::normalize(std::vector(boxes_nd)); - CHECK(normalized_nd.size() == 1); - - BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(boxes_nd)); }; - - if constexpr(Dims < 3) { - const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_nd); - BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(boxes_3d)); }; - } - - if constexpr(Dims == 2) { - render_boxes(boxes_nd, fmt::format("{}-input", label)); - render_boxes(normalized_nd, fmt::format("{}-normalized", label)); - } -} - -// TODO: benchmark small box sets - we want low constant overhead for the common case - TEST_CASE("region union - 2d", "[grid]") { const region<2> ra{{ {{0, 0}, {3, 3}}, @@ -482,10 +257,10 @@ TEST_CASE("region union - 2d", "[grid]") { const auto result = region_union(ra, rb); CHECK(result.get_boxes() == expected); - render_boxes(ra.get_boxes(), "ra"); - render_boxes(rb.get_boxes(), "rb"); - render_boxes(expected, "expected"); - render_boxes(result.get_boxes(), "result"); + test_utils::render_boxes(ra.get_boxes(), "ra"); + test_utils::render_boxes(rb.get_boxes(), "rb"); + test_utils::render_boxes(expected, "expected"); + test_utils::render_boxes(result.get_boxes(), "result"); } TEST_CASE("region intersection - 2d", "[grid]") { @@ -514,10 +289,10 @@ TEST_CASE("region intersection - 2d", "[grid]") { const auto result = region_intersection(ra, rb); CHECK(result.get_boxes() == expected); - render_boxes(ra.get_boxes(), "ra"); - render_boxes(rb.get_boxes(), "rb"); - render_boxes(expected, "expected"); - render_boxes(result.get_boxes(), "result"); + test_utils::render_boxes(ra.get_boxes(), "ra"); + test_utils::render_boxes(rb.get_boxes(), "rb"); + test_utils::render_boxes(expected, "expected"); + test_utils::render_boxes(result.get_boxes(), "result"); } TEST_CASE("region difference - 2d", "[grid]") { @@ -549,10 +324,10 @@ TEST_CASE("region difference - 2d", "[grid]") { const auto result = region_difference(ra, rb); CHECK(result.get_boxes() == expected); - render_boxes(ra.get_boxes(), "ra"); - render_boxes(rb.get_boxes(), "rb"); - render_boxes(expected, "expected"); - render_boxes(result.get_boxes(), "result"); + test_utils::render_boxes(ra.get_boxes(), "ra"); + test_utils::render_boxes(rb.get_boxes(), "rb"); + test_utils::render_boxes(expected, "expected"); + test_utils::render_boxes(result.get_boxes(), "result"); } TEST_CASE("region normalization - 0d", "[grid]") { @@ -597,84 +372,3 @@ TEST_CASE("region difference - 0d", "[grid]") { CHECK(!region_difference(unit, empty).empty()); CHECK(region_difference(unit, unit).empty()); } - -TEST_CASE("performing set operations between randomized regions - 2d", "[grid]") { - const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ - {"small", 10, 5, 4}, - {"medium", 50, 1, 50}, - {"large", 200, 20, 100}, - })); - - const std::vector inputs_2d{ - region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 13)), region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 37))}; - const std::vector inputs_3d{region_cast<3>(inputs_2d[0]), region_cast<3>(inputs_2d[1])}; - - render_boxes(inputs_2d[0].get_boxes(), fmt::format("{}-input-a", label)); - render_boxes(inputs_2d[1].get_boxes(), fmt::format("{}-input-b", label)); - - BENCHMARK(fmt::format("union, {}, native", label)) { return region_union(inputs_2d[0], inputs_2d[1]); }; - BENCHMARK(fmt::format("union, {}, embedded in 3d", label)) { return region_union(inputs_3d[0], inputs_3d[1]); }; - BENCHMARK(fmt::format("intersection, {}, native", label)) { return region_intersection(inputs_2d[0], inputs_2d[1]); }; - BENCHMARK(fmt::format("intersection, {}, embedded in 3d", label)) { return region_intersection(inputs_3d[0], inputs_3d[1]); }; - BENCHMARK(fmt::format("difference, {}, native", label)) { return region_difference(inputs_2d[0], inputs_2d[1]); }; - BENCHMARK(fmt::format("difference, {}, embedded in 3d", label)) { return region_difference(inputs_3d[0], inputs_3d[1]); }; - - const auto union_2d = region_union(inputs_2d[0], inputs_2d[1]); - const auto union_3d = region_union(inputs_3d[0], inputs_3d[1]); - const auto intersection_2d = region_intersection(inputs_2d[0], inputs_2d[1]); - const auto intersection_3d = region_intersection(inputs_3d[0], inputs_3d[1]); - const auto difference_2d = region_difference(inputs_2d[0], inputs_2d[1]); - const auto difference_3d = region_difference(inputs_3d[0], inputs_3d[1]); - - CHECK(union_3d == region_cast<3>(union_2d)); - CHECK(intersection_3d == region_cast<3>(intersection_2d)); - CHECK(difference_3d == region_cast<3>(difference_2d)); - - render_boxes(union_2d.get_boxes(), fmt::format("union-{}", label)); - render_boxes(intersection_2d.get_boxes(), fmt::format("intersection-{}", label)); - render_boxes(difference_2d.get_boxes(), fmt::format("difference-{}", label)); -} - -TEST_CASE("performing set operations between randomized regions - 3d", "[grid]") { - const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ - {"small", 10, 5, 4}, - {"medium", 50, 1, 50}, - {"large", 200, 20, 100}, - })); - - const std::vector inputs_3d{ - region(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 13)), region(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 37))}; - - BENCHMARK(fmt::format("union, {}, native", label)) { return region_union(inputs_3d[0], inputs_3d[1]); }; - BENCHMARK(fmt::format("intersection, {}, native", label)) { return region_intersection(inputs_3d[0], inputs_3d[1]); }; - BENCHMARK(fmt::format("difference, {}, native", label)) { return region_difference(inputs_3d[0], inputs_3d[1]); }; - - // to attach a profiler - test_utils::black_hole(region_union(inputs_3d[0], inputs_3d[1])); - test_utils::black_hole(region_intersection(inputs_3d[0], inputs_3d[1])); - test_utils::black_hole(region_difference(inputs_3d[0], inputs_3d[1])); -} - -std::vector> create_interlocking_boxes(const size_t num_boxes_per_side) { - std::vector> boxes; - for(size_t i = 0; i < num_boxes_per_side; ++i) { - boxes.emplace_back(id<2>(i, i), id<2>(i + 1, num_boxes_per_side)); - boxes.emplace_back(id<2>(i + 1, i), id<2>(num_boxes_per_side, i + 1)); - } - return boxes; -} - -TEST_CASE("normalizing a fully mergeable, complex tiling of boxes - 2d", "[grid]") { - const auto [label, n] = GENERATE(values>({ - {"small", 10}, - {"large", 200}, - })); - - const auto boxes_2d = create_interlocking_boxes(n); - const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_2d); - - BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(boxes_2d)); }; - BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(boxes_3d)); }; - - render_boxes(boxes_2d, fmt::format("{}-input", label)); -} From d49a6d5554196d19a5c96f94f422d08017855835 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Fri, 25 Aug 2023 16:10:50 +0200 Subject: [PATCH 08/17] Use small_vector for storage of region boxes --- include/grid.h | 42 +++++++++++--------- src/buffer_manager.cc | 6 +-- src/distributed_graph_generator.cc | 4 +- src/grid.cc | 50 ++++++++++++------------ src/task.cc | 2 +- src/task_manager.cc | 2 +- test/grid_benchmarks.cc | 42 ++++++++++---------- test/grid_test_utils.cc | 2 +- test/grid_test_utils.h | 4 +- test/grid_tests.cc | 61 +++++++++++++++--------------- test/test_utils.h | 6 +++ 11 files changed, 117 insertions(+), 104 deletions(-) diff --git a/include/grid.h b/include/grid.h index 8abb185c9..d666b0237 100644 --- a/include/grid.h +++ b/include/grid.h @@ -5,6 +5,8 @@ #include #include +#include + #include "ranges.h" #include "workaround.h" @@ -201,23 +203,35 @@ struct box_coordinate_order { } }; +template +using box_vector = gch::small_vector>; + +template +box_vector boxes_cast(const box_vector& in) { + assert(grid_detail::get_min_dimensions(in.begin(), in.end()) <= DimsOut); + box_vector out(in.size(), box()); + std::transform(in.begin(), in.end(), out.begin(), box_cast); + return out; +} + /// An arbitrary-dimensional set of points described by a normalized tiling of boxes. template class region { public: constexpr static int dimensions = Dims; using box = detail::box; + using box_vector = detail::box_vector; region() = default; region(const box& single_box); region(const subrange& single_sr); /// Constructs a region by normalizing an arbitrary, potentially-overlapping tiling of boxes. - explicit region(std::vector&& boxes); + explicit region(box_vector&& boxes); - const std::vector& get_boxes() const& { return m_boxes; } + const box_vector& get_boxes() const& { return m_boxes; } - std::vector into_boxes() && { return std::move(m_boxes); } + box_vector into_boxes() && { return std::move(m_boxes); } bool empty() const { return m_boxes.empty(); } @@ -236,26 +250,18 @@ class region { template friend region grid_detail::make_region(P&&... args); - std::vector m_boxes; + box_vector m_boxes; - region(grid_detail::normalized_t, std::vector&& boxes); + region(grid_detail::normalized_t, box_vector&& boxes); }; } // namespace celerity::detail namespace celerity::detail::grid_detail { -template -std::vector> boxes_cast(const std::vector>& in) { - assert(get_min_dimensions(in.begin(), in.end()) <= DimsOut); - std::vector> out(in.size()); - std::transform(in.begin(), in.end(), out.begin(), [](const box& box) { return box_cast(box); }); - return out; -} - // forward-declaration for tests (explicitly instantiated) template -void dissect_box(const box& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim); +void dissect_box(const box& in_box, const std::vector>& cuts, box_vector& out_dissected, int dim); // forward-declaration for tests (explicitly instantiated) template @@ -263,11 +269,11 @@ BidirectionalIterator merge_connected_boxes_along_dim(const BidirectionalIterato // forward-declaration for tests (explicitly instantiated) template -void normalize(std::vector>& boxes); +void normalize(box_vector& boxes); // rvalue shortcut for normalize(lvalue) template -std::vector>&& normalize(std::vector>&& boxes) { +box_vector&& normalize(box_vector&& boxes) { normalize(boxes); return std::move(boxes); } @@ -280,7 +286,7 @@ template region region_cast(const region& in) { assert(in.get_min_dimensions() <= DimsOut); // a normalized region will remain normalized after the cast - return grid_detail::make_region(grid_detail::normalized, grid_detail::boxes_cast(in.get_boxes())); + return grid_detail::make_region(grid_detail::normalized, boxes_cast(in.get_boxes())); } template @@ -303,7 +309,7 @@ region region_union(const box& lhs, const region& rhs) { template region region_union(const box& lhs, const box& rhs) { - return region(std::vector{lhs, rhs}); + return region(box_vector{lhs, rhs}); } template diff --git a/src/buffer_manager.cc b/src/buffer_manager.cc index e61b1c183..3e32630c0 100644 --- a/src/buffer_manager.cc +++ b/src/buffer_manager.cc @@ -243,7 +243,7 @@ namespace detail { // If a previous buffer is provided, we may have to retain some or all of the existing data. const region<3> retain_region = ([&]() { - std::vector> boxes{coherent_box}; + box_vector<3> boxes{coherent_box}; if(previous_buffer.is_allocated()) { boxes.push_back(subrange(previous_buffer.offset, previous_buffer.storage->get_range())); } return region(std::move(boxes)); })(); // IIFE @@ -265,7 +265,7 @@ namespace detail { if(detail::access::mode_traits::is_consumer(mode)) #endif { - std::vector> updated_region_boxes; + box_vector<3> updated_region_boxes; std::vector remaining_transfers; auto& scheduled_buffer_transfers = m_scheduled_transfers[bid]; remaining_transfers.reserve(scheduled_buffer_transfers.size() / 2); @@ -332,7 +332,7 @@ namespace detail { } }; - std::vector> replicated_boxes; + box_vector<3> replicated_boxes; auto& buffer_data_locations = m_newest_data_location.at(bid); const auto data_locations = buffer_data_locations.get_region_values(remaining_region_after_transfers); for(auto& dl : data_locations) { diff --git a/src/distributed_graph_generator.cc b/src/distributed_graph_generator.cc index c753a8aca..72eb6e93b 100644 --- a/src/distributed_graph_generator.cc +++ b/src/distributed_graph_generator.cc @@ -293,7 +293,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) if(is_local_chunk && !is_pending_reduction) { const auto local_sources = buffer_state.local_last_writer.get_region_values(req); - std::vector> missing_part_boxes; + box_vector<3> missing_part_boxes; for(const auto& [box, wcs] : local_sources) { if(box.empty()) continue; if(!wcs.is_fresh()) { @@ -479,7 +479,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) // Determine which local data is fresh/stale based on task-level writes. auto requirements = get_buffer_requirements_for_mapped_access(tsk, subrange<3>(tsk.get_global_offset(), tsk.get_global_size()), tsk.get_global_size()); for(auto& [bid, reqs_by_mode] : requirements) { - std::vector> global_write_boxes; + box_vector<3> global_write_boxes; for(const auto mode : access::producer_modes) { if(reqs_by_mode.count(mode) == 0) continue; const auto& by_mode = reqs_by_mode.at(mode); diff --git a/src/grid.cc b/src/grid.cc index 2a9fa645e..9eae9c568 100644 --- a/src/grid.cc +++ b/src/grid.cc @@ -209,12 +209,12 @@ BidirectionalIterator merge_connected_boxes_along_dim(const BidirectionalIterato } // explicit instantiations for tests (might otherwise be inlined) -template std::vector>::iterator merge_connected_boxes_along_dim<0, 1>(std::vector>::iterator first, std::vector>::iterator last); -template std::vector>::iterator merge_connected_boxes_along_dim<0, 2>(std::vector>::iterator first, std::vector>::iterator last); -template std::vector>::iterator merge_connected_boxes_along_dim<1, 2>(std::vector>::iterator first, std::vector>::iterator last); -template std::vector>::iterator merge_connected_boxes_along_dim<0, 3>(std::vector>::iterator first, std::vector>::iterator last); -template std::vector>::iterator merge_connected_boxes_along_dim<1, 3>(std::vector>::iterator first, std::vector>::iterator last); -template std::vector>::iterator merge_connected_boxes_along_dim<2, 3>(std::vector>::iterator first, std::vector>::iterator last); +template box_vector<1>::iterator merge_connected_boxes_along_dim<0, 1>(box_vector<1>::iterator first, box_vector<1>::iterator last); +template box_vector<2>::iterator merge_connected_boxes_along_dim<0, 2>(box_vector<2>::iterator first, box_vector<2>::iterator last); +template box_vector<2>::iterator merge_connected_boxes_along_dim<1, 2>(box_vector<2>::iterator first, box_vector<2>::iterator last); +template box_vector<3>::iterator merge_connected_boxes_along_dim<0, 3>(box_vector<3>::iterator first, box_vector<3>::iterator last); +template box_vector<3>::iterator merge_connected_boxes_along_dim<1, 3>(box_vector<3>::iterator first, box_vector<3>::iterator last); +template box_vector<3>::iterator merge_connected_boxes_along_dim<2, 3>(box_vector<3>::iterator first, box_vector<3>::iterator last); // For higher-dimensional regions, the order in which dimensions are merged is relevant for the shape of the resulting box set. We merge along the last // ("fastest") dimension first to make sure the resulting boxes cover the largest possible extent of contiguous memory when are applied to buffers. @@ -238,7 +238,7 @@ BidirectionalIterator merge_connected_boxes(const BidirectionalIterator first, B // Split a box into parts according to dissection lines in `cuts`, where `cuts` is indexed by component dimension. This function is not generic // over EffectiveDims, rather, `cuts` will have 1 <= n <= StorageDims entries to indicate along how many dimensions the box should be dissected. template -void dissect_box(const box& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim) { +void dissect_box(const box& in_box, const std::vector>& cuts, box_vector& out_dissected, int dim) { assert(dim < static_cast(cuts.size())); const auto& dim_cuts = cuts[static_cast(dim)]; @@ -276,13 +276,13 @@ void dissect_box(const box& in_box, const std::vector& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim); -template void dissect_box(const box<3>& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim); +template void dissect_box(const box<2>& in_box, const std::vector>& cuts, box_vector<2>& out_dissected, int dim); +template void dissect_box(const box<3>& in_box, const std::vector>& cuts, box_vector<3>& out_dissected, int dim); // Apply dissect_box to all boxes in a range, with a shortcut if no cuts are to be done. template void dissect_boxes(const InputIterator first, const InputIterator last, const std::vector>& cuts, - std::vector::value_type>& out_dissected) { + box_vector::value_type::dimensions>& out_dissected) { if(!cuts.empty()) { for(auto it = first; it != last; ++it) { dissect_box(*it, cuts, out_dissected, 0); @@ -309,13 +309,13 @@ std::vector collect_dissection_lines(const InputIterator first, const In } template -void normalize_impl(std::vector>& boxes) { +void normalize_impl(box_vector& boxes) { static_assert(EffectiveDims <= StorageDims); assert(!boxes.empty()); if constexpr(EffectiveDims == 0) { // all 0d boxes are identical - boxes.resize(1); + boxes.resize(1, box()); } else if constexpr(EffectiveDims == 1) { // merge_connected_boxes will sort and merge - this is already the complete 1d normalization boxes.erase(merge_connected_boxes(boxes.begin(), boxes.end()), boxes.end()); @@ -333,7 +333,7 @@ void normalize_impl(std::vector>& boxes) { cuts[static_cast(d)] = collect_dissection_lines(boxes.begin(), boxes.end(), d); } - std::vector> disjoint_boxes; + box_vector disjoint_boxes; dissect_boxes(boxes.begin(), boxes.end(), cuts, disjoint_boxes); boxes = std::move(disjoint_boxes); @@ -375,7 +375,7 @@ decltype(auto) dispatch_effective_dims(int effective_dims, F&& f) { // There is exactly one sequence of boxes for any set of points that fulfills 1-4, meaning that an "==" comparison of normalized tilings would be equivalent // to an equality comparision of the covered point sets. template -void normalize(std::vector>& boxes) { +void normalize(box_vector& boxes) { boxes.erase(std::remove_if(boxes.begin(), boxes.end(), std::mem_fn(&box::empty)), boxes.end()); if(boxes.size() <= 1) return; @@ -388,10 +388,10 @@ void normalize(std::vector>& boxes) { } // explicit instantiations for tests (might otherwise be inlined into region::region) -template void normalize(std::vector>& boxes); -template void normalize(std::vector>& boxes); -template void normalize(std::vector>& boxes); -template void normalize(std::vector>& boxes); +template void normalize(box_vector<0>& boxes); +template void normalize(box_vector<1>& boxes); +template void normalize(box_vector<2>& boxes); +template void normalize(box_vector<3>& boxes); template region region_intersection_impl(const region& lhs, const region& rhs) { @@ -399,7 +399,7 @@ region region_intersection_impl(const region& lhs, con // O(N * M). This can probably be improved for large inputs by dissecting either lhs or rhs by the lines of the other and then performing an interval // search similar to how remove_pairwise_covered operates. - std::vector> intersection; + box_vector intersection; for(const auto& left : lhs.get_boxes()) { for(const auto& right : rhs.get_boxes()) { if(const auto box = grid_detail::box_intersection(left, right); !box.empty()) { intersection.push_back(box); } @@ -424,7 +424,7 @@ region region_intersection_impl(const region& lhs, con // Complete the region_difference operation with an already dissected left-hand side and knowledge of effective dimensionality. template -void apply_region_difference(std::vector>& dissected_left, const region& rhs) { +void apply_region_difference(box_vector& dissected_left, const region& rhs) { static_assert(EffectiveDims <= StorageDims); // O(N * M) remove all dissected boxes from lhs that are fully covered by any box in rhs @@ -450,16 +450,16 @@ void apply_region_difference(std::vector>& dissected_left, cons namespace celerity::detail { template -region::region(const box& single_box) : region(std::vector{single_box}) {} // still need to normalize in case single_box is empty +region::region(const box& single_box) : region(box_vector{single_box}) {} // still need to normalize in case single_box is empty template region::region(const subrange& single_sr) : region(box(single_sr)) {} template -region::region(std::vector&& boxes) : region(grid_detail::normalized, (/* in-place */ grid_detail::normalize(boxes), /* then */ std::move(boxes))) {} +region::region(box_vector&& boxes) : region(grid_detail::normalized, (/* in-place */ grid_detail::normalize(boxes), /* then */ std::move(boxes))) {} template -region::region(grid_detail::normalized_t /* tag */, std::vector&& boxes) : m_boxes(std::move(boxes)) {} +region::region(grid_detail::normalized_t /* tag */, box_vector&& boxes) : m_boxes(std::move(boxes)) {} template class region<0>; template class region<1>; @@ -472,7 +472,7 @@ region region_union(const region& lhs, const region& rhs) { if(lhs.empty()) return rhs; if(rhs.empty()) return lhs; - std::vector> box_union; + box_vector box_union; box_union.reserve(lhs.get_boxes().size() + rhs.get_boxes().size()); box_union.insert(box_union.end(), lhs.get_boxes().begin(), lhs.get_boxes().end()); box_union.insert(box_union.end(), rhs.get_boxes().begin(), rhs.get_boxes().end()); @@ -518,7 +518,7 @@ region region_difference(const region& lhs, const region& rhs) } // 2. dissect lhs according to the lines of rhs, so that any overlap between lhs and rhs is turned into an lhs box fully covered by an rhs box - std::vector> dissected_left; + box_vector dissected_left; grid_detail::dissect_boxes(lhs.get_boxes().begin(), lhs.get_boxes().end(), cuts, dissected_left); grid_detail::dispatch_effective_dims(effective_dims, [&](const auto effective_dims) { // diff --git a/src/task.cc b/src/task.cc index c8cf40480..264fda1ba 100644 --- a/src/task.cc +++ b/src/task.cc @@ -35,7 +35,7 @@ namespace detail { region<3> buffer_access_map::get_mode_requirements( const buffer_id bid, const access_mode mode, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const { - std::vector> boxes; + box_vector<3> boxes; for(size_t i = 0; i < m_accesses.size(); ++i) { if(m_accesses[i].first != bid || m_accesses[i].second->get_access_mode() != mode) continue; boxes.push_back(get_requirements_for_nth_access(i, kernel_dims, sr, global_size)); diff --git a/src/task_manager.cc b/src/task_manager.cc index 456a74f9e..77450ec7f 100644 --- a/src/task_manager.cc +++ b/src/task_manager.cc @@ -56,7 +56,7 @@ namespace detail { region<3> get_requirements(const task& tsk, buffer_id bid, const std::vector& modes) { const auto& access_map = tsk.get_buffer_access_map(); const subrange<3> full_range{tsk.get_global_offset(), tsk.get_global_size()}; - std::vector> boxes; + box_vector<3> boxes; for(auto m : modes) { const auto req = access_map.get_mode_requirements(bid, m, tsk.get_dimensions(), full_range, tsk.get_global_size()); boxes.insert(boxes.end(), req.get_boxes().begin(), req.get_boxes().end()); diff --git a/test/grid_benchmarks.cc b/test/grid_benchmarks.cc index 5d19fdc1e..4b84e63a7 100644 --- a/test/grid_benchmarks.cc +++ b/test/grid_benchmarks.cc @@ -13,11 +13,11 @@ using namespace celerity; using namespace celerity::detail; template -std::vector> create_random_boxes(const size_t grid_size, const size_t max_box_size, const size_t num_boxes, const uint32_t seed) { +box_vector create_random_boxes(const size_t grid_size, const size_t max_box_size, const size_t num_boxes, const uint32_t seed) { std::minstd_rand rng(seed); std::uniform_int_distribution offset_dist(0, grid_size - 1); std::binomial_distribution range_dist(max_box_size - 1, 0.5); - std::vector> boxes; + box_vector boxes; while(boxes.size() < num_boxes) { subrange sr; bool inbounds = true; @@ -39,14 +39,14 @@ TEST_CASE("normalizing randomized box sets - 2d", "[benchmark][grid]") { })); const auto input_2d = create_random_boxes<2>(grid_size, max_box_size, num_boxes, 42); - BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(input_2d)); }; + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(test_utils::copy(input_2d)); }; - const auto input_3d = grid_detail::boxes_cast<3>(input_2d); - BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(input_3d)); }; + const auto input_3d = boxes_cast<3>(input_2d); + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(test_utils::copy(input_3d)); }; - const auto normalized_2d = grid_detail::normalize(std::vector(input_2d)); - const auto normalized_3d = grid_detail::normalize(std::vector(input_3d)); - CHECK(normalized_3d == grid_detail::boxes_cast<3>(normalized_2d)); + const auto normalized_2d = grid_detail::normalize(test_utils::copy(input_2d)); + const auto normalized_3d = grid_detail::normalize(test_utils::copy(input_3d)); + CHECK(normalized_3d == boxes_cast<3>(normalized_2d)); test_utils::render_boxes(input_2d, fmt::format("{}-input", label)); test_utils::render_boxes(normalized_2d, fmt::format("{}-normalized", label)); @@ -60,18 +60,18 @@ TEST_CASE("normalizing randomized box sets - 3d", "[benchmark][grid]") { })); const auto input_3d = create_random_boxes<3>(grid_size, max_box_size, num_boxes, 42); - BENCHMARK(fmt::format("{} - native", label)) { return grid_detail::normalize(std::vector(input_3d)); }; - test_utils::black_hole(grid_detail::normalize(std::vector(input_3d))); // to attach a profiler + BENCHMARK(fmt::format("{} - native", label)) { return grid_detail::normalize(test_utils::copy(input_3d)); }; + test_utils::black_hole(grid_detail::normalize(test_utils::copy(input_3d))); // to attach a profiler } template -std::vector> create_box_tiling(const size_t n_per_side) { +box_vector create_box_tiling(const size_t n_per_side) { const size_t length = 5; size_t n_linear = 1; for(int d = 0; d < Dims; ++d) { n_linear *= n_per_side; } - std::vector> boxes(n_linear); + box_vector boxes(n_linear, box()); for(size_t i = 0; i < n_linear; ++i) { subrange sr; auto dist_i = i; @@ -95,14 +95,14 @@ TEMPLATE_TEST_CASE_SIG("normalizing a fully mergeable tiling of boxes", "[benchm const size_t n_per_side = llrint(pow(n, 1.0 / Dims)); const auto boxes_nd = create_box_tiling(n_per_side); - const auto normalized_nd = grid_detail::normalize(std::vector(boxes_nd)); + const auto normalized_nd = grid_detail::normalize(test_utils::copy(boxes_nd)); CHECK(normalized_nd.size() == 1); - BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(boxes_nd)); }; + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(test_utils::copy(boxes_nd)); }; if constexpr(Dims < 3) { - const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_nd); - BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(boxes_3d)); }; + const auto boxes_3d = boxes_cast<3>(boxes_nd); + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(test_utils::copy(boxes_3d)); }; } if constexpr(Dims == 2) { @@ -168,8 +168,8 @@ TEST_CASE("performing set operations between randomized regions - 3d", "[benchma test_utils::black_hole(region_difference(inputs_3d[0], inputs_3d[1])); } -std::vector> create_interlocking_boxes(const size_t num_boxes_per_side) { - std::vector> boxes; +box_vector<2> create_interlocking_boxes(const size_t num_boxes_per_side) { + box_vector<2> boxes; for(size_t i = 0; i < num_boxes_per_side; ++i) { boxes.emplace_back(id<2>(i, i), id<2>(i + 1, num_boxes_per_side)); boxes.emplace_back(id<2>(i + 1, i), id<2>(num_boxes_per_side, i + 1)); @@ -184,10 +184,10 @@ TEST_CASE("normalizing a fully mergeable, complex tiling of boxes - 2d", "[bench })); const auto boxes_2d = create_interlocking_boxes(n); - const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_2d); + const auto boxes_3d = boxes_cast<3>(boxes_2d); - BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(boxes_2d)); }; - BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(boxes_3d)); }; + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(test_utils::copy(boxes_2d)); }; + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(test_utils::copy(boxes_3d)); }; test_utils::render_boxes(boxes_2d, fmt::format("{}-input", label)); } diff --git a/test/grid_test_utils.cc b/test/grid_test_utils.cc index 3d904307c..89e79f411 100644 --- a/test/grid_test_utils.cc +++ b/test/grid_test_utils.cc @@ -31,7 +31,7 @@ std::array hsl2rgb(const float h, const float s, const float l) { return {r, g, b}; } -void render_boxes(const std::vector>& boxes, const std::string_view suffix) { +void render_boxes(const box_vector<2>& boxes, const std::string_view suffix) { #if CELERITY_DETAIL_HAVE_CAIRO const auto env = std::getenv("CELERITY_RENDER_REGIONS"); if(env == nullptr || env[0] == 0) return; diff --git a/test/grid_test_utils.h b/test/grid_test_utils.h index 01413089a..b1c1e68e9 100644 --- a/test/grid_test_utils.h +++ b/test/grid_test_utils.h @@ -5,7 +5,7 @@ namespace celerity::test_utils { struct partition_vector_order { template - bool operator()(const std::vector>& lhs, const std::vector>& rhs) { + bool operator()(const detail::box_vector& lhs, const detail::box_vector& rhs) { if(lhs.size() < rhs.size()) return true; if(lhs.size() > rhs.size()) return false; constexpr detail::box_coordinate_order box_order; @@ -17,6 +17,6 @@ struct partition_vector_order { } }; -void render_boxes(const std::vector>& boxes, const std::string_view suffix = "region"); +void render_boxes(const detail::box_vector<2>& boxes, const std::string_view suffix = "region"); } diff --git a/test/grid_tests.cc b/test/grid_tests.cc index 43d89f5e5..91c55bfe0 100644 --- a/test/grid_tests.cc +++ b/test/grid_tests.cc @@ -1,3 +1,4 @@ +#include "grid.h" #include "grid_test_utils.h" #include @@ -18,14 +19,14 @@ TEST_CASE("split_box dissects boxes as expected - 3d", "[grid]") { {0, 4, 8, 12}, {8, 9}, }; - std::vector> expected{ + box_vector<3> expected{ {{0, 0, 0}, {4, 8, 5}}, {{0, 8, 0}, {4, 9, 5}}, {{4, 0, 0}, {7, 8, 5}}, {{4, 8, 0}, {7, 9, 5}}, }; - std::vector> split; + box_vector<3> split; grid_detail::dissect_box(input_box, cuts, split, 0); std::sort(split.begin(), split.end(), box_coordinate_order()); @@ -34,7 +35,7 @@ TEST_CASE("split_box dissects boxes as expected - 3d", "[grid]") { } template -void test_directional_merge(std::vector> unmerged, std::vector> merged) { +void test_directional_merge(box_vector unmerged, box_vector merged) { CAPTURE(MergeDim); std::minstd_rand rng(42); std::shuffle(unmerged.begin(), unmerged.end(), rng); @@ -47,13 +48,13 @@ void test_directional_merge(std::vector> unmerged, std::vector> unmerged{ + const box_vector<1> unmerged{ {{0}, {2}}, {{2}, {4}}, {{4}, {8}}, {{10}, {12}}, }; - const std::vector> merged{ + const box_vector<1> merged{ {{0}, {8}}, {{10}, {12}}, }; @@ -61,14 +62,14 @@ TEST_CASE("directional merge of non-overlapping boxes - 1d", "[grid]") { } TEST_CASE("directional merge of overlapping boxes - 1d", "[grid]") { - const std::vector> unmerged{ + const box_vector<1> unmerged{ {{0}, {6}}, {{2}, {4}}, {{8}, {12}}, {{10}, {16}}, {{16}, {18}}, }; - const std::vector> merged{ + const box_vector<1> merged{ {{0}, {6}}, {{8}, {18}}, }; @@ -76,7 +77,7 @@ TEST_CASE("directional merge of overlapping boxes - 1d", "[grid]") { } TEST_CASE("directional merge of non-overlapping boxes - 2d", "[grid]") { - const std::vector> unmerged{ + const box_vector<2> unmerged{ {{0, 0}, {2, 2}}, {{0, 2}, {2, 4}}, {{0, 4}, {2, 6}}, @@ -88,7 +89,7 @@ TEST_CASE("directional merge of non-overlapping boxes - 2d", "[grid]") { {{4, 8}, {6, 10}}, }; - const std::vector> merged_dim0{ + const box_vector<2> merged_dim0{ {{0, 0}, {2, 2}}, {{0, 2}, {4, 4}}, {{0, 4}, {6, 6}}, @@ -97,7 +98,7 @@ TEST_CASE("directional merge of non-overlapping boxes - 2d", "[grid]") { }; test_directional_merge<0>(unmerged, merged_dim0); - const std::vector> merged_dim1{ + const box_vector<2> merged_dim1{ {{0, 0}, {2, 6}}, {{2, 2}, {4, 8}}, {{4, 4}, {6, 10}}, @@ -110,7 +111,7 @@ TEST_CASE("directional merge of non-overlapping boxes - 2d", "[grid]") { } TEST_CASE("directional merge of overlapping boxes - 2d", "[grid]") { - const std::vector> unmerged{ + const box_vector<2> unmerged{ {{0, 0}, {12, 3}}, {{0, 1}, {12, 4}}, {{0, 4}, {12, 6}}, @@ -121,7 +122,7 @@ TEST_CASE("directional merge of overlapping boxes - 2d", "[grid]") { {{8, 0}, {10, 12}}, }; - const std::vector> merged_dim0{ + const box_vector<2> merged_dim0{ {{0, 0}, {12, 3}}, {{0, 1}, {12, 4}}, {{0, 4}, {12, 6}}, @@ -131,7 +132,7 @@ TEST_CASE("directional merge of overlapping boxes - 2d", "[grid]") { }; test_directional_merge<0>(unmerged, merged_dim0); - const std::vector> merged_dim1{ + const box_vector<2> merged_dim1{ {{0, 0}, {12, 6}}, {{0, 8}, {12, 10}}, {{0, 0}, {3, 12}}, @@ -147,7 +148,7 @@ TEST_CASE("directional merge of overlapping boxes - 2d", "[grid]") { } TEST_CASE("directional merge of non-overlapping 3d boxes", "[grid]") { - const std::vector> unmerged{ + const box_vector<3> unmerged{ {{0, 0, 2}, {2, 2, 4}}, {{0, 2, 0}, {2, 4, 2}}, {{0, 2, 2}, {2, 4, 4}}, @@ -157,7 +158,7 @@ TEST_CASE("directional merge of non-overlapping 3d boxes", "[grid]") { {{2, 2, 2}, {4, 4, 4}}, }; - const std::vector> merged_dim0{ + const box_vector<3> merged_dim0{ {{0, 0, 2}, {4, 2, 4}}, {{0, 2, 0}, {4, 4, 2}}, {{0, 2, 2}, {4, 4, 4}}, @@ -165,7 +166,7 @@ TEST_CASE("directional merge of non-overlapping 3d boxes", "[grid]") { }; test_directional_merge<0>(unmerged, merged_dim0); - const std::vector> merged_dim1{ + const box_vector<3> merged_dim1{ {{0, 2, 0}, {2, 4, 2}}, {{0, 0, 2}, {2, 4, 4}}, {{2, 0, 0}, {4, 4, 2}}, @@ -173,7 +174,7 @@ TEST_CASE("directional merge of non-overlapping 3d boxes", "[grid]") { }; test_directional_merge<1>(unmerged, merged_dim1); - const std::vector> merged_dim2{ + const box_vector<3> merged_dim2{ {{0, 0, 2}, {2, 2, 4}}, {{0, 2, 0}, {2, 4, 4}}, {{2, 0, 0}, {4, 2, 4}}, @@ -183,19 +184,19 @@ TEST_CASE("directional merge of non-overlapping 3d boxes", "[grid]") { } TEST_CASE("region normalization removes overlaps - 2d", "[grid]") { - const std::vector> overlapping{ + const box_vector<2> overlapping{ {{0, 0}, {4, 4}}, {{2, 2}, {6, 6}}, {{4, 8}, {5, 9}}, }; - std::vector> normalized{ + box_vector<2> normalized{ {{0, 0}, {2, 4}}, {{2, 0}, {4, 6}}, {{4, 2}, {6, 6}}, {{4, 8}, {5, 9}}, }; - const auto result = grid_detail::normalize(std::vector(overlapping)); + const auto result = grid_detail::normalize(test_utils::copy(overlapping)); std::sort(normalized.begin(), normalized.end(), box_coordinate_order()); CHECK(result == normalized); @@ -205,20 +206,20 @@ TEST_CASE("region normalization removes overlaps - 2d", "[grid]") { } TEST_CASE("region normalization maximizes extent of fast dimensions - 2d", "[grid]") { - const std::vector> input{ + const box_vector<2> input{ {{0, 0}, {8, 2}}, {{0, 2}, {2, 4}}, {{6, 2}, {8, 4}}, {{0, 4}, {8, 6}}, }; - std::vector> normalized{ + box_vector<2> normalized{ {{0, 0}, {2, 6}}, {{2, 0}, {6, 2}}, {{2, 4}, {6, 6}}, {{6, 0}, {8, 6}}, }; - const auto result = grid_detail::normalize(std::vector(input)); + const auto result = grid_detail::normalize(test_utils::copy(input)); std::sort(normalized.begin(), normalized.end(), box_coordinate_order()); CHECK(result == normalized); @@ -241,7 +242,7 @@ TEST_CASE("region union - 2d", "[grid]") { {{4, 7}, {6, 9}}, }}; - std::vector> expected{ + box_vector<2> expected{ {{0, 0}, {2, 3}}, {{2, 0}, {3, 6}}, {{3, 3}, {4, 6}}, @@ -278,7 +279,7 @@ TEST_CASE("region intersection - 2d", "[grid]") { {{2, 9}, {3, 14}}, }}; - std::vector> expected{ + box_vector<2> expected{ {{3, 4}, {6, 6}}, {{7, 2}, {9, 4}}, {{2, 9}, {3, 11}}, @@ -309,7 +310,7 @@ TEST_CASE("region difference - 2d", "[grid]") { {{7, 1}, {11, 5}}, }}; - std::vector> expected{ + box_vector<2> expected{ {{0, 0}, {1, 6}}, {{1, 0}, {3, 1}}, {{3, 0}, {4, 2}}, @@ -331,13 +332,13 @@ TEST_CASE("region difference - 2d", "[grid]") { } TEST_CASE("region normalization - 0d", "[grid]") { - std::vector> r; + box_vector<0> r; auto n = r; - CHECK(grid_detail::normalize(std::vector(r)).empty()); + CHECK(grid_detail::normalize(test_utils::copy(r)).empty()); r.emplace_back(); - CHECK(grid_detail::normalize(std::vector(r)) == std::vector{{box<0>()}}); + CHECK(grid_detail::normalize(test_utils::copy(r)) == box_vector<0>{{box<0>()}}); r.emplace_back(); - CHECK(grid_detail::normalize(std::vector(r)) == std::vector{{box<0>()}}); + CHECK(grid_detail::normalize(test_utils::copy(r)) == box_vector<0>{{box<0>()}}); } TEST_CASE("region union - 0d", "[grid]") { diff --git a/test/test_utils.h b/test/test_utils.h index fa8f5df6f..c2a321061 100644 --- a/test/test_utils.h +++ b/test/test_utils.h @@ -361,6 +361,12 @@ namespace test_utils { ~task_test_context() { maybe_print_task_graph(trec); } }; + // explicitly invoke a copy constructor without repeating the type + template + T copy(const T& v) { + return v; + } + template void black_hole(T&& v) { Catch::Benchmark::keep_memory(&v); From e8343618a5be2340ab0ce6a25165324848cfaa73 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Sat, 9 Sep 2023 11:17:09 +0200 Subject: [PATCH 09/17] Re-format code after grid re-implementation (CI formatting check seems broken) --- include/ranges.h | 2 +- src/buffer_transfer_manager.cc | 2 +- src/task.cc | 3 +-- src/worker_job.cc | 3 +-- test/accessor_tests.cc | 8 ++++---- test/grid_test_utils.cc | 2 +- test/grid_test_utils.h | 2 +- 7 files changed, 10 insertions(+), 12 deletions(-) diff --git a/include/ranges.h b/include/ranges.h index 6e42c7c99..82f0129b9 100644 --- a/include/ranges.h +++ b/include/ranges.h @@ -489,7 +489,7 @@ namespace detail { #undef CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN template - bool all_true(const coordinate &bools) { + bool all_true(const coordinate& bools) { for(int d = 0; d < Dims; ++d) { CELERITY_DETAIL_ASSERT_ON_HOST(bools[d] == 0 || bools[d] == 1); if(bools[d] == 0) return false; diff --git a/src/buffer_transfer_manager.cc b/src/buffer_transfer_manager.cc index c0f7fddc7..fe15559de 100644 --- a/src/buffer_transfer_manager.cc +++ b/src/buffer_transfer_manager.cc @@ -64,7 +64,7 @@ namespace detail { assert(pkg.get_command_type() == command_type::await_push); const auto& data = std::get(pkg.data); - const auto &expected_region = data.region; + const auto& expected_region = data.region; std::shared_ptr t_handle; // Check to see if we have (fully) received the data already diff --git a/src/task.cc b/src/task.cc index 264fda1ba..f5fbaeb98 100644 --- a/src/task.cc +++ b/src/task.cc @@ -43,8 +43,7 @@ namespace detail { return region(std::move(boxes)); } - box<3> buffer_access_map::get_requirements_for_nth_access( - const size_t n, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const { + box<3> buffer_access_map::get_requirements_for_nth_access(const size_t n, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const { const auto& [_, rm] = m_accesses[n]; chunk<3> chnk{sr.offset, sr.range, global_size}; diff --git a/src/worker_job.cc b/src/worker_job.cc index 046932b63..7ab74fb9f 100644 --- a/src/worker_job.cc +++ b/src/worker_job.cc @@ -261,8 +261,7 @@ namespace detail { if(oob_max != id<3>{1, 1, 1}) { const auto& access_map = tsk->get_buffer_access_map(); - const auto acc_sr = - access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()).get_subrange(); + const auto acc_sr = access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()).get_subrange(); const auto oob_sr = subrange<3>(oob_min, range_cast<3>(oob_max - oob_min)); CELERITY_ERROR("Out-of-bounds access in kernel '{}' detected: Accessor {} for buffer {} attempted to access indices between {} which are " "outside of mapped subrange {}", diff --git a/test/accessor_tests.cc b/test/accessor_tests.cc index 15eec65fd..1c4ed1df8 100644 --- a/test/accessor_tests.cc +++ b/test/accessor_tests.cc @@ -171,8 +171,8 @@ namespace detail { // this kernel initializes the buffer what will be read after. auto acc_write = accessor_fixture::template get_device_accessor(bid, range, {}); - test_utils::run_parallel_for>(accessor_fixture::get_device_queue().get_sycl_queue(), - range, {}, [=](celerity::item item) { acc_write[item] = item.get_linear_id(); }); + test_utils::run_parallel_for>( + accessor_fixture::get_device_queue().get_sycl_queue(), range, {}, [=](celerity::item item) { acc_write[item] = item.get_linear_id(); }); SECTION("for device buffers") { auto acc_read = accessor_fixture::template get_device_accessor(bid, range, {}); @@ -207,8 +207,8 @@ namespace detail { } typename accessor_fixture::access_target tgt = accessor_fixture::access_target::host; - bool acc_check = accessor_fixture::template buffer_reduce>(bid, tgt, range, - {}, true, [range = range](id idx, bool current, size_t value) { return current && value == get_linear_index(range, idx); }); + bool acc_check = accessor_fixture::template buffer_reduce>( + bid, tgt, range, {}, true, [range = range](id idx, bool current, size_t value) { return current && value == get_linear_index(range, idx); }); REQUIRE(acc_check); } diff --git a/test/grid_test_utils.cc b/test/grid_test_utils.cc index 89e79f411..f9aac2924 100644 --- a/test/grid_test_utils.cc +++ b/test/grid_test_utils.cc @@ -109,4 +109,4 @@ void render_boxes(const box_vector<2>& boxes, const std::string_view suffix) { #endif } -} +} // namespace celerity::test_utils diff --git a/test/grid_test_utils.h b/test/grid_test_utils.h index b1c1e68e9..990508a61 100644 --- a/test/grid_test_utils.h +++ b/test/grid_test_utils.h @@ -19,4 +19,4 @@ struct partition_vector_order { void render_boxes(const detail::box_vector<2>& boxes, const std::string_view suffix = "region"); -} +} // namespace celerity::test_utils From f47d1923de7c3d241ca4f49135e1d5c75482c1cf Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Sat, 9 Sep 2023 11:31:33 +0200 Subject: [PATCH 10/17] Simplify one usage of regions in buffer_manager to boxes --- src/buffer_manager.cc | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/buffer_manager.cc b/src/buffer_manager.cc index 3e32630c0..1c7381393 100644 --- a/src/buffer_manager.cc +++ b/src/buffer_manager.cc @@ -285,17 +285,15 @@ namespace detail { // separate commands. This might however change in the future. if(t_minus_coherent_region != t_box) { assert(detail::access::mode_traits::is_consumer(mode)); - auto intersection = region(box_intersection(t_box, coherent_box)); // TODO this can be a box instead of a region! + auto intersection = box_intersection(t_box, coherent_box); remaining_region_after_transfers = region_difference(remaining_region_after_transfers, intersection); const auto element_size = m_buffer_infos.at(bid).element_size; - for(const auto& box : intersection.get_boxes()) { - auto sr = box.get_subrange(); - // TODO can this temp buffer be avoided? - auto tmp = make_uninitialized_payload(sr.range.size() * element_size); - linearize_subrange(t.linearized.get_pointer(), tmp.get_pointer(), element_size, t.sr.range, {sr.offset - t.sr.offset, sr.range}); - target_buffer.storage->set_data({target_buffer.get_local_offset(sr.offset), sr.range}, tmp.get_pointer()); - updated_region_boxes.push_back(box); - } + auto sr = intersection.get_subrange(); + // TODO can this temp buffer be avoided? + auto tmp = make_uninitialized_payload(sr.range.size() * element_size); + linearize_subrange(t.linearized.get_pointer(), tmp.get_pointer(), element_size, t.sr.range, {sr.offset - t.sr.offset, sr.range}); + target_buffer.storage->set_data({target_buffer.get_local_offset(sr.offset), sr.range}, tmp.get_pointer()); + updated_region_boxes.push_back(intersection); } // Transfer only applies partially, or not at all - which means we have to keep it around. remaining_transfers.emplace_back(std::move(t)); From 354a4b76a834487a2f7f519c9f00bed0448cc20b Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Sat, 9 Sep 2023 11:32:07 +0200 Subject: [PATCH 11/17] Address reviwer comments on grid --- src/grid.cc | 2 +- test/grid_test_utils.cc | 1 - test/grid_test_utils.h | 14 -------------- 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/grid.cc b/src/grid.cc index 9eae9c568..d0b8a0d15 100644 --- a/src/grid.cc +++ b/src/grid.cc @@ -369,7 +369,7 @@ decltype(auto) dispatch_effective_dims(int effective_dims, F&& f) { // For any set of boxes, find the unique box tiling that covers the same points and is subject to the following constraints: // 1. the extent of every box is maximized along the last dimension, then along the second-to-last dimension, and so forth. -// 2. no two boxes within the tiling intersect (i.e. cover a common point). +// 2. no two boxes within the tiling intersect (i.e. cover a common point). // 3. the tiling contains no empty boxes. // 4. the normalized sequence is sorted according to box_coordinate_order. // There is exactly one sequence of boxes for any set of points that fulfills 1-4, meaning that an "==" comparison of normalized tilings would be equivalent diff --git a/test/grid_test_utils.cc b/test/grid_test_utils.cc index f9aac2924..69eeb4404 100644 --- a/test/grid_test_utils.cc +++ b/test/grid_test_utils.cc @@ -7,7 +7,6 @@ using namespace celerity; using namespace celerity::detail; -// forward declarations for functions not exposed in grid.h namespace celerity::test_utils { // input: h as an angle in [0,360] and s,l in [0,1] - output: r,g,b in [0,1] diff --git a/test/grid_test_utils.h b/test/grid_test_utils.h index 990508a61..57c18df04 100644 --- a/test/grid_test_utils.h +++ b/test/grid_test_utils.h @@ -3,20 +3,6 @@ namespace celerity::test_utils { -struct partition_vector_order { - template - bool operator()(const detail::box_vector& lhs, const detail::box_vector& rhs) { - if(lhs.size() < rhs.size()) return true; - if(lhs.size() > rhs.size()) return false; - constexpr detail::box_coordinate_order box_order; - for(size_t i = 0; i < lhs.size(); ++i) { - if(box_order(lhs[i], rhs[i])) return true; - if(box_order(rhs[i], lhs[i])) return false; - } - return false; - } -}; - void render_boxes(const detail::box_vector<2>& boxes, const std::string_view suffix = "region"); } // namespace celerity::test_utils From 96559409b857bc7c954564964381b026d6621a45 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Sat, 9 Sep 2023 12:12:03 +0200 Subject: [PATCH 12/17] Rename grid::get_min_dimensions() to get_effective_dims(), move to detail:: for public types --- include/grid.h | 14 +++--- include/ranges.h | 108 +++++++++++++++++++++++-------------------- include/region_map.h | 2 +- src/grid.cc | 8 ++-- 4 files changed, 71 insertions(+), 61 deletions(-) diff --git a/include/grid.h b/include/grid.h index d666b0237..d84ac3b52 100644 --- a/include/grid.h +++ b/include/grid.h @@ -39,8 +39,8 @@ region make_region(Params&&... args) { } template -int get_min_dimensions(const InputIterator first, const InputIterator last) { - return std::accumulate(first, last, 0, [](const int min_dims, const auto& box) { return std::max(min_dims, box.get_min_dimensions()); }); +int get_effective_dims(const InputIterator first, const InputIterator last) { + return std::accumulate(first, last, 0, [](const int min_dims, const auto& box) { return std::max(min_dims, box.get_effective_dims()); }); } } // namespace celerity::detail::grid_detail @@ -101,7 +101,7 @@ class box /* class instead of struct: enforces min <= max invariant */ { size_t get_area() const { return get_range().size(); } /// Returns the smallest dimensionality that `*this` can be `box_cast` to. - int get_min_dimensions() const { + int get_effective_dims() const { if(empty()) return 1; // edge case: a 0-dimensional box is always non-empty for(int dims = Dims; dims > 0; --dims) { if(m_max[dims - 1] > 1) { return dims; } @@ -143,7 +143,7 @@ class box /* class instead of struct: enforces min <= max invariant */ { /// Boxes can be cast between dimensionalities as long as no information is lost (i.e. a cast to a higher dimensionality is always round-trip safe). template box box_cast(const box& in) { - CELERITY_DETAIL_ASSERT_ON_HOST(in.get_min_dimensions() <= DimsOut); + CELERITY_DETAIL_ASSERT_ON_HOST(in.get_effective_dims() <= DimsOut); return box(subrange_cast(in.get_subrange())); // cast through subrange to fill missing range dimensions with 1s } @@ -208,7 +208,7 @@ using box_vector = gch::small_vector>; template box_vector boxes_cast(const box_vector& in) { - assert(grid_detail::get_min_dimensions(in.begin(), in.end()) <= DimsOut); + assert(grid_detail::get_effective_dims(in.begin(), in.end()) <= DimsOut); box_vector out(in.size(), box()); std::transform(in.begin(), in.end(), out.begin(), box_cast); return out; @@ -241,7 +241,7 @@ class region { } /// Returns the smallest dimensionality that `*this` can be `region_cast` to. - int get_min_dimensions() const { return grid_detail::get_min_dimensions(m_boxes.begin(), m_boxes.end()); } + int get_effective_dims() const { return grid_detail::get_effective_dims(m_boxes.begin(), m_boxes.end()); } friend bool operator==(const region& lhs, const region& rhs) { return lhs.m_boxes == rhs.m_boxes; } friend bool operator!=(const region& lhs, const region& rhs) { return !(lhs == rhs); } @@ -284,7 +284,7 @@ namespace celerity::detail { template region region_cast(const region& in) { - assert(in.get_min_dimensions() <= DimsOut); + assert(in.get_effective_dims() <= DimsOut); // a normalized region will remain normalized after the cast return grid_detail::make_region(grid_detail::normalized, boxes_cast(in.get_boxes())); } diff --git a/include/ranges.h b/include/ranges.h index 82f0129b9..110676933 100644 --- a/include/ranges.h +++ b/include/ranges.h @@ -216,21 +216,11 @@ class coordinate { CELERITY_DETAIL_NO_UNIQUE_ADDRESS coordinate_storage m_values; }; -template -InterfaceOut coordinate_cast(const InterfaceIn& in) { - CELERITY_DETAIL_ASSERT_ON_HOST(in.get_min_dimensions() <= InterfaceOut::dimensions); - return InterfaceOut(make_from, in); -} - template -range range_cast(const InterfaceIn& in) { - return coordinate_cast>(in); -} +range range_cast(const InterfaceIn& in); template -id id_cast(const InterfaceIn& in) { - return coordinate_cast>(in); -} +id id_cast(const InterfaceIn& in); struct zeros_t { } inline static constexpr zeros; @@ -287,19 +277,11 @@ class range : public detail::coordinate, Dims> { } } - /// Returns the smallest dimensionality that `*this` can be `range_cast` to. - int get_min_dimensions() const { - for(int dims = Dims; dims > 0; --dims) { - if((*this)[dims - 1] > 1) return dims; - } - return 0; - } - private: friend class detail::coordinate, Dims>; - template - friend InterfaceOut detail::coordinate_cast(const InterfaceIn& in); + template + friend range detail::range_cast(const InterfaceIn& in); template > constexpr range() noexcept {} @@ -353,17 +335,9 @@ class id : public detail::coordinate, Dims> { } } - /// Returns the smallest dimensionality that `*this` can be `id_cast` to. - int get_min_dimensions() const { - for(int dims = Dims; dims > 0; --dims) { - if((*this)[dims - 1] > 0) { return dims; } - } - return 0; - } - private: - template - friend InterfaceOut detail::coordinate_cast(const InterfaceIn& in); + template + friend id detail::id_cast(const InterfaceIn& in); template constexpr id(const detail::make_from_t /* tag */, const detail::coordinate& in) @@ -512,9 +486,6 @@ struct chunk { chunk(const id& offset, const celerity::range& range, const celerity::range& global_size) : offset(offset), range(range), global_size(global_size) {} - /// Returns the smallest dimensionality that `*this` can be `chunk_cast` to. - int get_min_dimensions() const { return std::max({offset.get_min_dimensions(), range.get_min_dimensions(), global_size.get_min_dimensions()}); } - friend bool operator==(const chunk& lhs, const chunk& rhs) { return lhs.offset == rhs.offset && lhs.range == rhs.range && lhs.global_size == rhs.global_size; } @@ -532,27 +503,66 @@ struct subrange { subrange(const id& offset, const celerity::range& range) : offset(offset), range(range) {} subrange(const chunk& other) : offset(other.offset), range(other.range) {} - /// Returns the smallest dimensionality that `*this` can be `subrange_cast` to. - int get_min_dimensions() const { return std::max({offset.get_min_dimensions(), range.get_min_dimensions()}); } - friend bool operator==(const subrange& lhs, const subrange& rhs) { return lhs.offset == rhs.offset && lhs.range == rhs.range; } friend bool operator!=(const subrange& lhs, const subrange& rhs) { return !operator==(lhs, rhs); } }; -namespace detail { +} // namespace celerity - template - chunk chunk_cast(const chunk& other) { - CELERITY_DETAIL_ASSERT_ON_HOST(other.get_min_dimensions() <= Dims); - return chunk{detail::id_cast(other.offset), detail::range_cast(other.range), detail::range_cast(other.global_size)}; +namespace celerity::detail { + +/// Returns the smallest dimensionality that the range can be `range_cast` to. +template +int get_effective_dims(const range& range) { + for(int dims = Dims; dims > 0; --dims) { + if(range[dims - 1] > 1) return dims; } + return 0; +} - template - subrange subrange_cast(const subrange& other) { - CELERITY_DETAIL_ASSERT_ON_HOST(other.get_min_dimensions() <= Dims); - return subrange{detail::id_cast(other.offset), detail::range_cast(other.range)}; +template +range range_cast(const InterfaceIn& in) { + CELERITY_DETAIL_ASSERT_ON_HOST(get_effective_dims(in) <= DimsOut); + return range(make_from, in); +} + +/// Returns the smallest dimensionality that the id can be `id_cast` to. +template +int get_effective_dims(const id& id) { + for(int dims = Dims; dims > 0; --dims) { + if(id[dims - 1] > 0) { return dims; } } + return 0; +} -} // namespace detail +template +id id_cast(const InterfaceIn& in) { + CELERITY_DETAIL_ASSERT_ON_HOST(get_effective_dims(in) <= DimsOut); + return id(make_from, in); +} -} // namespace celerity +/// Returns the smallest dimensionality that the chunk can be `chunk_cast` to. +template +int get_effective_dims(const chunk& ck) { + return std::max({get_effective_dims(ck.offset), get_effective_dims(ck.range), get_effective_dims(ck.global_size)}); +} + +/// Returns the smallest dimensionality that the subrange can be `subrange_cast` to. +template +int get_effective_dims(const subrange& sr) { + return std::max(get_effective_dims(sr.offset), get_effective_dims(sr.range)); +} + +template +chunk chunk_cast(const chunk& other) { + CELERITY_DETAIL_ASSERT_ON_HOST(get_effective_dims(other) <= Dims); + return chunk{detail::id_cast(other.offset), detail::range_cast(other.range), detail::range_cast(other.global_size)}; +} + +template +subrange subrange_cast(const subrange& other) { + CELERITY_DETAIL_ASSERT_ON_HOST(get_effective_dims(other) <= Dims); + return subrange{detail::id_cast(other.offset), detail::range_cast(other.range)}; +} + +} // namespace celerity::detail diff --git a/include/region_map.h b/include/region_map.h index 2ee9facaa..aaa417d7a 100644 --- a/include/region_map.h +++ b/include/region_map.h @@ -812,7 +812,7 @@ namespace region_map_detail { inline void assert_dimensionality(const box<3>& box, const int dims) { #if !defined(NDEBUG) - assert(box.get_min_dimensions() <= dims); + assert(box.get_effective_dims() <= dims); #endif } diff --git a/src/grid.cc b/src/grid.cc index d0b8a0d15..b61562a29 100644 --- a/src/grid.cc +++ b/src/grid.cc @@ -4,7 +4,7 @@ namespace celerity::detail::grid_detail { // Regions have a storage dimensionality (the `Dims` template parameter of `class region`) and an effective dimensionality that is smaller iff all contained // boxes are effectively the result of casting e.g. box<2> to box<3>, or the described region "accidentally" is a lower-dimensional slice of the full space. -// This property is detected at runtime through {box,region}::get_min_dimensions(), and all region-algorithm implementations are generic over both StorageDims +// This property is detected at runtime through {box,region}::get_effective_dims(), and all region-algorithm implementations are generic over both StorageDims // and EffectiveDims to optimize for the embedding of arbitrary-dimensional regions into region<3> as it commonly happens in the runtime. // 2-connectivity for 1d boxes, 4-connectivity for 2d boxes and 6-connectivity for 3d boxes. @@ -379,7 +379,7 @@ void normalize(box_vector& boxes) { boxes.erase(std::remove_if(boxes.begin(), boxes.end(), std::mem_fn(&box::empty)), boxes.end()); if(boxes.size() <= 1) return; - const auto effective_dims = get_min_dimensions(boxes.begin(), boxes.end()); + const auto effective_dims = get_effective_dims(boxes.begin(), boxes.end()); assert(effective_dims <= Dims); dispatch_effective_dims(effective_dims, [&](const auto effective_dims) { // @@ -489,7 +489,7 @@ region region_intersection(const region& lhs, const region& rh // shortcut-evaluate trivial cases if(lhs.empty() || rhs.empty()) return {}; - const auto effective_dims = std::max(lhs.get_min_dimensions(), rhs.get_min_dimensions()); + const auto effective_dims = std::max(lhs.get_effective_dims(), rhs.get_effective_dims()); return grid_detail::dispatch_effective_dims(effective_dims, [&](const auto effective_dims) { // return grid_detail::region_intersection_impl(lhs, rhs); }); @@ -508,7 +508,7 @@ region region_difference(const region& lhs, const region& rhs) // the resulting effective_dims can never be greater than the lhs dimension, but the difference operator must still operate on all available dimensions // to correctly identify overlapping boxes - const auto effective_dims = std::max(lhs.get_min_dimensions(), rhs.get_min_dimensions()); + const auto effective_dims = std::max(lhs.get_effective_dims(), rhs.get_effective_dims()); assert(effective_dims <= Dims); // 1. collect dissection lines (in *all* dimensions) from rhs From 0ca4458794c0da487e954f128b2e99b8a782a99d Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Tue, 12 Sep 2023 14:18:56 +0200 Subject: [PATCH 13/17] Get rid of unnecessary "remove overlap" step in region normalization --- src/grid.cc | 116 +++------------------------------------------ test/grid_tests.cc | 22 +++++++++ 2 files changed, 29 insertions(+), 109 deletions(-) diff --git a/src/grid.cc b/src/grid.cc index b61562a29..8193319da 100644 --- a/src/grid.cc +++ b/src/grid.cc @@ -7,27 +7,6 @@ namespace celerity::detail::grid_detail { // This property is detected at runtime through {box,region}::get_effective_dims(), and all region-algorithm implementations are generic over both StorageDims // and EffectiveDims to optimize for the embedding of arbitrary-dimensional regions into region<3> as it commonly happens in the runtime. -// 2-connectivity for 1d boxes, 4-connectivity for 2d boxes and 6-connectivity for 3d boxes. -template -bool boxes_connected(const box& box1, const box& box2) { - static_assert(EffectiveDims <= StorageDims); - - if(box1.empty() || box2.empty()) return false; - - bool touching = false; - for(int d = 0; d < EffectiveDims; ++d) { - const auto min = std::max(box1.get_min()[d], box2.get_min()[d]); - const auto max = std::min(box1.get_max()[d], box2.get_max()[d]); - if(min[d] > max[d]) return false; // fully disconnected, even across corners - if(min[d] == max[d]) { - // when boxes are touching (but not intersecting) in more than one dimension, they can only be connected via corners - if(touching) return false; - touching = true; - } - } - return true; -} - // Like detail::box_intersection, but aware of effective dimensionality template box box_intersection(const box& box1, const box& box2) { @@ -62,81 +41,6 @@ bool box_covers(const box& top, const box& bottom) { return true; } -// O(N^2) remove any box A != B for which box_covers(B, A) is true -template -BidirectionalIterator remove_pairwise_covered(BidirectionalIterator first, BidirectionalIterator last) { - for(auto top = first; top != last; ++top) { - top_replaced: - for(auto bottom = std::next(top); bottom != last;) { - if(box_covers(*top, *bottom)) { - *bottom = *--last; - } else if(box_covers(*bottom, *top)) { - *top = *bottom; - *bottom = *--last; - goto top_replaced; // NOLINT(cppcoreguidelines-avoid-goto) - } else { - ++bottom; - } - } - } - return last; -} - -// Partition a range of boxes into intervals described by a grid of dissection lines, and invoke a user function on each partition. -template -void for_each_dissection_interval(BidirectionalIterator first, BidirectionalIterator last, const std::vector>& cuts, Fn&& f, int dim = 0) { - using box_type = typename std::iterator_traits::value_type; - - assert(first != last); - - if(cuts.size() <= static_cast(dim)) { - // We are past the last dissected dimension, so the interval is just our entire input range - f(first, last); - return; - } - - // Since boxes can never cross a dissection line, we can partition the range into dissection intervals by sorting along one dimension - std::sort(first, last, [dim](const box_type& lhs, const box_type& rhs) { return lhs.get_min()[dim] < rhs.get_min()[dim]; }); - - auto next_cut = cuts[dim].begin(); - while(first != last) { - // The current box `first` always belongs to our interval. Now find, in O(log N), the dissection line that marks the end of this interval - next_cut = std::upper_bound(next_cut, cuts[dim].end(), first->get_min()[dim]); - assert(next_cut != cuts[dim].end()); - - // Find, in O(log N), the end iterator of our interval by searching the first item that is "right" of the dissection line - const auto next = std::lower_bound(first, last, *next_cut, [dim](const box_type& lhs, const size_t cut) { return lhs.get_min()[dim] < cut; }); - - // Recurse into the found interval along the next (faster) dimension - for_each_dissection_interval(first, next, cuts, f, dim + 1); - - first = next; - } -} - -// Like remove_pairwise_covered(first, last), but at lower average complexity for a range of boxes that are dissected according to `cuts`. -template -BidirectionalIterator remove_pairwise_covered(BidirectionalIterator first, BidirectionalIterator last, const std::vector>& cuts) { - using box_type [[maybe_unused]] = typename std::iterator_traits::value_type; - - assert(cuts.size() <= EffectiveDims); - assert(std::all_of(cuts.begin(), cuts.end(), [](const std::vector& dim_cuts) { return std::is_sorted(dim_cuts.begin(), dim_cuts.end()); })); - - if(first == last || std::next(first) == last) return last; - - // We compact the range in-place after each removal by left-shifting each de-duplicated range - auto last_out = first; - - for_each_dissection_interval(first, last, cuts, [&](const BidirectionalIterator i_first, const BidirectionalIterator i_last) { - // Delegate the interval to the O(N^2) overload of remove_pairwise_covered - const auto last_retained = remove_pairwise_covered(i_first, i_last); - // for_each_dissection_interval will not touch [first, i_last) after this iteration - last_out = std::move(i_first, last_retained, last_out); - }); - - return last_out; -} - // In a range of boxes that are identical in all dimensions except MergeDim, merge all connected boxes ("unconditional directional merge") template BidirectionalIterator merge_connected_intervals(BidirectionalIterator first, BidirectionalIterator last) { @@ -327,26 +231,20 @@ void normalize_impl(box_vector& boxes) { assert(!boxes.empty()); if(boxes.size() == 1) return; - // 1. dissect boxes along the edges of all other boxes (except the last, "fastest" dim) to create the "maximally mergeable set" of small boxes for step + // 1. dissect boxes along the edges of all other boxes (except the last, "fastest" dim) to create the "maximally mergeable set" of boxes for step 2 std::vector> cuts(EffectiveDims - 1); for(int d = 0; d < EffectiveDims - 1; ++d) { cuts[static_cast(d)] = collect_dissection_lines(boxes.begin(), boxes.end(), d); } - box_vector disjoint_boxes; - dissect_boxes(boxes.begin(), boxes.end(), cuts, disjoint_boxes); - boxes = std::move(disjoint_boxes); + box_vector dissected_boxes; + dissect_boxes(boxes.begin(), boxes.end(), cuts, dissected_boxes); + boxes = std::move(dissected_boxes); - // 2. remove all overlap by removing pairwise coverings - const auto first = boxes.begin(); - auto last = boxes.end(); - last = remove_pairwise_covered(first, last, cuts); - - // 3. merge the overlap-free tiling of boxes where possible - last = merge_connected_boxes(first, last); - boxes.erase(last, boxes.end()); + // 2. the dissected tiling of boxes only potentially overlaps in the fastest dimension - merge where possible + boxes.erase(merge_connected_boxes(boxes.begin(), boxes.end()), boxes.end()); - // 4. normalize box order + // 3. normalize box order std::sort(boxes.begin(), boxes.end(), box_coordinate_order()); } } diff --git a/test/grid_tests.cc b/test/grid_tests.cc index 91c55bfe0..c77cc6358 100644 --- a/test/grid_tests.cc +++ b/test/grid_tests.cc @@ -1,5 +1,6 @@ #include "grid.h" #include "grid_test_utils.h" +#include "test_utils.h" #include #include @@ -228,6 +229,27 @@ TEST_CASE("region normalization maximizes extent of fast dimensions - 2d", "[gri test_utils::render_boxes(normalized, "normalized"); } +// we used to have a separate step to remove overlaps between dissected boxes - unnecessarily, as splitting + merging already takes care of that! +TEMPLATE_TEST_CASE_SIG("region normalization removes fully covered boxes", "[grid]", ((int Dims), Dims), 1, 2, 3) { + const box_vector input{ + {test_utils::truncate_id({1, 1, 1}), test_utils::truncate_id({9, 9, 9})}, + {test_utils::truncate_id({1, 1, 1}), test_utils::truncate_id({7, 7, 7})}, + {test_utils::truncate_id({3, 3, 3}), test_utils::truncate_id({7, 7, 7})}, + }; + const box_vector normalized{ + {test_utils::truncate_id({1, 1, 1}), test_utils::truncate_id({9, 9, 9})}, + }; + + const auto result = grid_detail::normalize(test_utils::copy(input)); + CHECK(result == normalized); + + if constexpr(Dims == 2) { + test_utils::render_boxes(input, "input"); + test_utils::render_boxes(result, "result"); + test_utils::render_boxes(normalized, "normalized"); + } +} + TEST_CASE("region union - 2d", "[grid]") { const region<2> ra{{ {{0, 0}, {3, 3}}, From 2e67e442a8c0c6536f40cfe49fcbb21f97299fb0 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Wed, 13 Sep 2023 12:30:19 +0200 Subject: [PATCH 14/17] Add comments on potential future region algorithm optimizations --- src/grid.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/grid.cc b/src/grid.cc index 8193319da..42cc4155b 100644 --- a/src/grid.cc +++ b/src/grid.cc @@ -295,8 +295,11 @@ template region region_intersection_impl(const region& lhs, const region& rhs) { static_assert(EffectiveDims <= StorageDims); - // O(N * M). This can probably be improved for large inputs by dissecting either lhs or rhs by the lines of the other and then performing an interval - // search similar to how remove_pairwise_covered operates. + // O(N * M) naively collect intersections of box pairs. + // There might be a way to optimize this further through sorting one side and finding potentially intersecting boxes through lower_bound + upper_bound. + // I have previously attempted to implement this entirely without box_intersection by dissecting both sides by the union of their dissection lines, + // sorting both by box_coordinate_order and finding common boxes through std::set_intersection. Practically this turned out to be slower, sometimes + // by several orders of magnitude, as the number of dissected boxes can grow to O((N * M) ^ EffectiveDims). box_vector intersection; for(const auto& left : lhs.get_boxes()) { for(const auto& right : rhs.get_boxes()) { @@ -325,7 +328,8 @@ template void apply_region_difference(box_vector& dissected_left, const region& rhs) { static_assert(EffectiveDims <= StorageDims); - // O(N * M) remove all dissected boxes from lhs that are fully covered by any box in rhs + // O(N * M) remove all dissected boxes from lhs that are fully covered by any box in rhs. + // For further optimization potential see the comments on region_intersection_impl. const auto first_left = dissected_left.begin(); auto last_left = dissected_left.end(); for(const auto& right : rhs.get_boxes()) { From 17feadbfcc7c9ade0652b938ecc2edbb57886297 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Wed, 13 Sep 2023 16:43:39 +0200 Subject: [PATCH 15/17] fmt format subranges as "[offset] + [range]" instead of "[min] - [max]" --- include/print_utils.h | 8 ++++++-- test/print_graph_tests.cc | 30 +++++++++++++++--------------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/include/print_utils.h b/include/print_utils.h index 7e4c6cf9b..97d6ce653 100644 --- a/include/print_utils.h +++ b/include/print_utils.h @@ -51,9 +51,13 @@ struct fmt::formatter> : fmt::formatter -struct fmt::formatter> : fmt::formatter> { +struct fmt::formatter> : fmt::formatter> { format_context::iterator format(const celerity::subrange& sr, format_context& ctx) const { - return fmt::formatter>::format(celerity::detail::box(sr), ctx); + auto out = ctx.out(); + out = formatter>::format(sr.offset, ctx); + out = std::copy_n(" + ", 3, out); + out = formatter>::format(celerity::id(sr.range), ctx); // cast to id to avoid multiple inheritance + return out; } }; diff --git a/test/print_graph_tests.cc b/test/print_graph_tests.cc index 3de14a09f..0b45e41dc 100644 --- a/test/print_graph_tests.cc +++ b/test/print_graph_tests.cc @@ -43,11 +43,11 @@ TEST_CASE("task-graph printing is unchanged", "[print_graph][task-graph]") { // replace the `expected` value with the new dot graph. const std::string expected = "digraph G {label=\"Task Graph\" 0[shape=ellipse label=epoch>];1[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
discard_write B1 {[0,0,0] - [1,1,1]}>];0->1[color=orchid];2[shape=box style=rounded " - "label=device-compute [0,0,0] - [64,1,1]
discard_write B0 {[0,0,0] - " - "[64,1,1]}>];0->2[color=orchid];3[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
(R1) " + "
device-compute [0,0,0] + [64,1,1]
discard_write B1 {[0,0,0] - [1,1,1]}>];0->1[color=orchid];2[shape=box style=rounded " + "label=device-compute [0,0,0] + [64,1,1]
discard_write B0 {[0,0,0] - " + "[64,1,1]}>];0->2[color=orchid];3[shape=box style=rounded label=device-compute [0,0,0] + [64,1,1]
(R1) " "read_write B1 {[0,0,0] - [1,1,1]}
read B0 {[0,0,0] - [64,1,1]}>];1->3[];2->3[];4[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
read B1 {[0,0,0] - [1,1,1]}>];3->4[];}"; + "\"task_consume_5\"
device-compute [0,0,0] + [64,1,1]
read B1 {[0,0,0] - [1,1,1]}>];3->4[];}"; CHECK(print_task_graph(tt.trec) == expected); } @@ -78,17 +78,17 @@ TEST_CASE("command graph printing is unchanged", "[print_graph][command-graph]") const std::string expected = "digraph G{label=\"Command Graph\" subgraph cluster_id_0_0{label=<T0 (epoch)>;color=darkgray;id_0_0[label=epoch> fontcolor=black shape=box];}subgraph cluster_id_0_1{label=<T1 \"reduce_8\" " - "(device-compute)>;color=darkgray;id_0_1[label=execution [0,0,0] - [1,1,1]
(R1) discard_write B0 {[0,0,0] - " + "(device-compute)>;color=darkgray;id_0_1[label=execution [0,0,0] + [1,1,1]
(R1) discard_write B0 {[0,0,0] - " "[1,1,1]}> fontcolor=black shape=box];}subgraph cluster_id_0_2{label=<T2 \"consume_9\" " - "(device-compute)>;color=darkgray;id_0_2[label=execution [0,0,0] - [1,1,1]
read B0 {[0,0,0] - " + "(device-compute)>;color=darkgray;id_0_2[label=execution [0,0,0] + [1,1,1]
read B0 {[0,0,0] - " "[1,1,1]}
read_write B0 {[0,0,0] - [1,1,1]}
write B0 {[0,0,0] - [1,1,1]}> fontcolor=black " "shape=box];}id_0_0->id_0_1[color=orchid];id_0_3->id_0_2[];id_0_5->id_0_2[color=limegreen];id_0_6->id_0_2[color=limegreen];id_0_7->id_0_2[color=" "limegreen];id_0_3[label=reduction R1
B0 {[0,0,0] - [1,1,1]}> fontcolor=black " "shape=ellipse];id_0_1->id_0_3[];id_0_4->id_0_3[];id_0_4[label=(R1) await push transfer 8589934592
BB0 {[0,0,0] - " "[1,1,1]}> fontcolor=black shape=ellipse];id_0_0->id_0_4[color=orchid];id_0_5[label=(R1) push transfer 8589934593 to N1
BB0 " - "[0,0,0] - [1,1,1]> fontcolor=black shape=ellipse];id_0_1->id_0_5[];id_0_6[label=(R1) push transfer 8589934594 to N2
BB0 " - "[0,0,0] - [1,1,1]> fontcolor=black shape=ellipse];id_0_1->id_0_6[];id_0_7[label=(R1) push transfer 8589934595 to N3
BB0 " - "[0,0,0] - [1,1,1]> fontcolor=black shape=ellipse];id_0_1->id_0_7[];}"; + "[0,0,0] + [1,1,1]> fontcolor=black shape=ellipse];id_0_1->id_0_5[];id_0_6[label=(R1) push transfer 8589934594 to N2
BB0 " + "[0,0,0] + [1,1,1]> fontcolor=black shape=ellipse];id_0_1->id_0_6[];id_0_7[label=(R1) push transfer 8589934595 to N3
BB0 " + "[0,0,0] + [1,1,1]> fontcolor=black shape=ellipse];id_0_1->id_0_7[];}"; // fully check node 0 const auto dot0 = dctx.print_command_graph(0); @@ -159,11 +159,11 @@ TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY SECTION("task graph") { const auto* expected = "digraph G {label=\"Task Graph\" 0[shape=ellipse label=epoch>];1[shape=box style=rounded label=device-compute [0,0,0] - [16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}>];0->1[color=orchid];2[shape=ellipse " + "
device-compute [0,0,0] + [16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}>];0->1[color=orchid];2[shape=ellipse " "label=horizon>];1->2[color=orange];3[shape=box style=rounded label=device-compute " - "[0,0,0] - [16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}>];1->3[];4[shape=ellipse " + "[0,0,0] + [16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}>];1->3[];4[shape=ellipse " "label=horizon>];3->4[color=orange];2->4[color=orange];5[shape=box style=rounded label=device-compute [0,0,0] - [16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}>];3->5[];6[shape=ellipse " + "
device-compute [0,0,0] + [16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}>];3->5[];6[shape=ellipse " "label=horizon>];5->6[color=orange];4->6[color=orange];7[shape=ellipse label=epoch>];6->7[color=orange];}"; CHECK(runtime_testspy::print_task_graph(celerity::detail::runtime::get_instance()) == expected); @@ -173,13 +173,13 @@ TEST_CASE_METHOD(test_utils::runtime_fixture, "full graph is printed if CELERITY const auto* expected = "digraph G{label=\"Command Graph\" subgraph cluster_id_0_0{label=<T0 (epoch)>;color=darkgray;id_0_0[label=epoch> fontcolor=black shape=box];}subgraph cluster_id_0_1{label=<T1 \"full_graph_printing_17\" " - "(device-compute)>;color=darkgray;id_0_1[label=execution [0,0,0] - [16,1,1]
read_write B0 {[0,0,0] - " + "(device-compute)>;color=darkgray;id_0_1[label=execution [0,0,0] + [16,1,1]
read_write B0 {[0,0,0] - " "[16,1,1]}> fontcolor=black shape=box];}subgraph cluster_id_0_2{label=<T2 " "(horizon)>;color=darkgray;id_0_2[label=horizon> fontcolor=black shape=box];}subgraph cluster_id_0_3{label=<T3 \"full_graph_printing_17\" (device-compute)>;color=darkgray;id_0_3[label=execution [0,0,0] - " + "color=\"#606060\">T3 \"full_graph_printing_17\" (device-compute)>;color=darkgray;id_0_3[label=execution [0,0,0] + " "[16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}> fontcolor=black shape=box];}subgraph cluster_id_0_4{label=<T4 " "(horizon)>;color=darkgray;id_0_4[label=horizon> fontcolor=black shape=box];}subgraph cluster_id_0_5{label=<T5 \"full_graph_printing_17\" (device-compute)>;color=darkgray;id_0_5[label=execution [0,0,0] - " + "color=\"#606060\">T5 \"full_graph_printing_17\" (device-compute)>;color=darkgray;id_0_5[label=execution [0,0,0] + " "[16,1,1]
read_write B0 {[0,0,0] - [16,1,1]}> fontcolor=black shape=box];}subgraph cluster_id_0_6{label=<T6 " "(horizon)>;color=darkgray;id_0_6[label=horizon> fontcolor=black shape=box];}subgraph cluster_id_0_7{label=<T7 (epoch)>;color=darkgray;id_0_7[label=epoch (barrier)> fontcolor=black " From c6ce3e6d087a45e103355e052da9b54590065d97 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Thu, 14 Sep 2023 09:12:16 +0200 Subject: [PATCH 16/17] Update benchmark resuts for grid re-implementation --- ci/perf/gpuc2_bench.csv | 233 ++++++++++++++++++++++++--------------- ci/perf/gpuc2_bench.md | 235 +++++++++++++++++++++++++--------------- 2 files changed, 289 insertions(+), 179 deletions(-) diff --git a/ci/perf/gpuc2_bench.csv b/ci/perf/gpuc2_bench.csv index ba7caa9fc..b77098a77 100644 --- a/ci/perf/gpuc2_bench.csv +++ b/ci/perf/gpuc2_bench.csv @@ -1,90 +1,145 @@ test case,benchmark name,samples,iterations,estimated,mean,low mean,high mean,std dev,low std dev,high std dev,raw -benchmark intrusive graph dependency handling with N nodes - 1,creating nodes,100,5345,2138000,4.5603,4.4790,4.9647,0.8061,0.0038,1.9240,"4.4797,4.4814,4.4834,4.4814,4.4797,4.4795,4.4685,4.4814,4.4797,4.4814,4.4816,4.4795,4.4683,4.4816,12.5809,4.4816,4.4683,4.4797,4.4814,4.4816,4.4795,4.4797,4.4814,4.4816,4.4814,4.4816,4.4664,4.4795,4.4795,4.4814,4.4816,4.4814,4.4797,4.4795,4.4816,4.4814,4.4685,4.4795,4.4816,4.4814,4.4797,4.4814,4.4816,4.4814,4.4797,4.4795,4.4683,4.4816,4.4795,4.4816,4.4814,4.4797,4.4814,4.4816,4.4814,4.4816,4.4795,4.4685,4.4795,4.4797,4.4795,4.4797,4.4814,4.4814,4.4795,4.4795,4.4814,4.4683,4.4797,4.4795,4.4816,4.4814,4.4816,4.4795,4.4816,4.4814,4.4797,4.4664,4.4814,4.4814,4.4795,4.4795,4.4814,4.4816,4.4814,4.4816,4.4795,4.4685,4.4795,4.4797,4.4795,4.4816,4.4814,4.4797,4.4814,4.4797,4.4814,4.4683,4.4797,4.4795" -benchmark intrusive graph dependency handling with N nodes - 1,creating and adding dependencies,100,1035,2380500,22.8967,22.8783,22.9124,0.0866,0.0751,0.1020,"22.6406,22.9401,23.0077,22.9691,22.9787,22.8531,22.9787,22.7855,22.9498,22.8145,22.8145,22.8435,22.9498,22.9787,22.8531,22.8918,22.9691,22.9787,22.9498,22.9691,22.9594,22.8628,22.8232,22.7845,22.9691,22.9797,22.8918,22.9691,22.9787,22.9401,22.9691,22.8918,22.8725,22.9401,22.6986,22.9594,22.9208,22.9014,22.9884,22.8242,22.9401,22.9594,22.8821,22.8628,22.9594,22.8145,22.9691,22.9797,22.8821,22.8522,22.9787,22.9498,22.9787,22.9594,22.8725,22.9691,22.9401,22.8145,22.9401,22.8242,22.8725,22.9691,22.9401,22.7758,22.7758,22.8628,22.9691,22.9401,22.9884,22.9208,22.9787,22.9208,22.8232,22.7758,22.9401,22.9691,22.9700,22.9787,22.7758,22.9691,22.8531,22.9014,22.8628,22.8039,22.7855,22.9498,22.9691,22.8618,22.9498,22.7072,22.9498,22.9787,22.9797,22.7556,22.9498,22.6396,22.9208,22.7855,22.6686,22.7082" -benchmark intrusive graph dependency handling with N nodes - 1,adding and removing dependencies,100,1535,2302500,15.7554,15.7152,15.7980,0.2115,0.1888,0.2386,"16.1068,15.4808,16.2313,15.6964,15.8007,15.9316,15.5459,15.4547,16.1792,15.7094,15.6443,15.6176,15.9446,15.7811,15.5134,15.8137,15.7160,15.7029,15.5068,15.7811,15.8007,16.1270,15.8853,15.4873,15.5596,15.4547,15.6958,16.1003,15.7160,15.9121,15.8007,15.7746,15.8007,15.8007,15.8853,16.0684,15.5329,15.7811,15.7225,15.8984,16.0287,15.9642,15.8007,15.8072,15.7811,15.5394,15.9577,15.8137,15.5524,15.8007,15.4612,15.7355,15.6052,15.6046,15.7420,15.4547,15.7876,15.8007,15.6182,16.0749,16.0026,15.7225,15.4547,15.7420,15.8919,15.7609,15.6371,15.4547,15.8007,16.1857,15.4873,15.6111,15.7420,15.4547,15.4873,16.0945,15.4808,15.4873,16.0358,15.7746,16.2704,15.7479,15.9707,16.1661,15.6313,15.5003,15.7746,15.7290,16.0814,15.6958,16.0489,15.4873,15.4547,15.4879,15.6371,15.7225,15.9440,15.7870,15.4547,15.7479" -benchmark intrusive graph dependency handling with N nodes - 1,checking for dependencies,100,13932,1393200,1.6970,1.6963,1.6975,0.0031,0.0026,0.0035,"1.6906,1.6984,1.6920,1.6992,1.6985,1.6985,1.6985,1.6905,1.6985,1.6985,1.6985,1.6985,1.6985,1.6906,1.6977,1.6985,1.6985,1.6985,1.6905,1.6985,1.6985,1.6977,1.6985,1.6906,1.6977,1.6985,1.6985,1.6985,1.6985,1.6905,1.6985,1.6985,1.6985,1.6985,1.6906,1.6985,1.6985,1.6985,1.6985,1.6992,1.6905,1.6985,1.6985,1.6985,1.6985,1.6906,1.6985,1.6992,1.6985,1.6985,1.6984,1.6905,1.6985,1.6992,1.6985,1.6985,1.6906,1.6985,1.6985,1.6992,1.6984,1.6984,1.6905,1.6985,1.6992,1.6985,1.6985,1.6906,1.6977,1.6985,1.6977,1.6984,1.6905,1.6985,1.6985,1.6985,1.6985,1.6985,1.6906,1.6985,1.6985,1.6984,1.6984,1.6906,1.6985,1.6985,1.6985,1.6985,1.6985,1.6906,1.6977,1.6984,1.6984,1.6977,1.6906,1.6985,1.6985,1.6985,1.6985,1.6985" -benchmark intrusive graph dependency handling with N nodes - 10,creating nodes,100,648,2332800,39.4194,39.3444,39.4957,0.3860,0.3634,0.4100,"39.8565,38.9753,39.7006,39.4537,39.3611,39.9491,39.9336,39.9336,39.8410,39.0833,39.7176,39.9336,39.9336,39.9491,39.6867,38.9907,38.8673,39.0062,39.7948,39.9645,39.9336,39.9352,39.5926,38.9753,39.0525,39.8565,39.0679,39.0062,38.9907,39.0062,39.0062,38.9907,39.0370,39.8410,39.9491,39.1605,39.0062,39.5000,39.9506,39.6698,39.1312,39.9336,39.8565,39.0988,38.9907,38.9907,39.0062,39.2840,39.7485,39.0370,39.8565,39.0370,39.7330,39.3148,39.5000,39.5633,38.9907,39.0062,38.8827,39.0062,39.7639,39.2685,39.5170,39.5309,39.2701,39.9491,39.9028,38.8827,38.9907,38.9753,39.5772,39.4707,39.3302,39.7485,39.0525,39.8410,39.9491,39.1296,39.0062,39.0062,39.3765,39.6867,39.1296,39.9491,38.8827,39.8256,39.9336,39.9491,39.4691,38.9753,39.0062,39.0525,39.8410,39.0525,39.7639,39.3148,39.5000,39.5633,38.9907,39.0833" -benchmark intrusive graph dependency handling with N nodes - 10,creating and adding dependencies,100,101,2393700,244.4994,244.4219,244.5766,0.3922,0.3441,0.4566,"244.0099,243.5149,245.5941,244.8020,244.7030,245.0000,243.6139,243.8119,244.5050,244.4950,244.4059,245.1980,243.8119,244.8020,244.6040,244.6040,244.2079,244.3069,244.4950,243.9010,244.8020,244.9010,243.7129,244.4059,244.8020,245.1980,244.2079,243.9010,244.8911,245.0000,244.0099,244.9010,244.4059,244.1089,244.8020,244.7030,244.3069,244.3069,244.2970,245.0990,243.9109,244.6040,244.7030,244.6040,245.0000,244.9010,244.1089,244.3069,244.8020,244.8020,244.6931,244.3960,243.9109,244.3069,244.7030,245.2970,244.9010,244.8020,244.3069,244.7030,244.3069,244.3960,244.4059,244.8020,244.9010,245.3960,244.6040,244.1089,244.8020,244.7030,244.5050,244.0099,244.7030,244.4950,244.2970,244.8020,243.7129,244.2079,244.5050,244.6040,244.9010,244.5050,244.3960,244.2970,244.8020,244.6040,244.2079,244.0099,244.0099,244.3960,244.6931,244.3069,244.3069,244.7030,244.3069,244.4059,244.5050,244.4059,244.6040,244.3960" -benchmark intrusive graph dependency handling with N nodes - 10,adding and removing dependencies,100,107,2396800,201.9213,201.3884,203.9418,4.7389,1.0585,11.0153,"200.1682,200.1682,204.2056,202.3271,202.3271,202.3271,202.3271,202.2336,202.2336,202.3271,202.3271,202.3271,202.3271,202.3271,202.3271,202.2336,202.3271,202.3364,202.3271,202.3271,202.3271,202.3271,202.2336,202.2336,202.3271,202.3271,202.3271,202.3271,202.3271,202.2336,202.2336,202.2336,202.3271,202.3271,202.3364,202.3271,202.2336,202.2336,202.2336,202.3271,202.3271,202.3271,202.3271,202.3271,202.2336,202.2336,202.3271,202.3271,202.3271,202.3271,202.3271,202.3271,202.2336,202.3271,202.3271,202.3271,202.3271,202.3271,202.3271,202.3271,202.3364,247.8318,200.1776,200.1682,200.0841,200.1776,200.1682,200.1776,200.1776,200.1682,200.1776,200.1776,200.1682,200.0841,200.1776,200.1682,200.1776,200.0841,200.1682,200.1682,200.1776,200.1776,200.1682,200.1776,200.1776,200.1682,200.0841,200.1776,200.1682,200.1776,200.0841,200.1682,200.1776,200.1776,200.1682,200.1776,200.1776,200.1682,200.1776,200.0841" -benchmark intrusive graph dependency handling with N nodes - 10,checking for dependencies,100,920,2300000,26.6400,26.4670,26.7990,0.8479,0.7637,0.9532,"27.1141,27.1141,27.1261,25.3717,25.3609,25.3511,25.3500,25.3500,25.3500,25.3500,25.3500,25.3609,25.3511,25.3500,25.3500,25.3511,25.3500,25.3500,25.3500,25.3620,25.3500,25.3500,25.3511,25.3500,25.3500,25.3500,25.3500,25.3500,25.3500,25.3511,25.3500,25.3500,28.9772,27.0489,27.6696,27.5511,27.1467,27.1467,27.1370,27.1359,27.1478,27.0489,27.1141,27.1261,27.1250,27.1250,27.1141,27.1141,27.0489,27.1250,27.1250,27.1152,27.1141,27.1141,27.1261,27.0489,27.1141,27.1141,27.4196,27.1250,27.2674,27.1141,27.0707,27.1033,27.1250,27.1152,27.1250,27.1250,27.1261,27.1141,27.0489,27.1261,27.1141,27.1152,27.1250,27.1141,27.1152,27.0489,27.1250,27.1152,27.1141,27.1250,27.1141,27.1141,27.0489,27.1250,27.4848,27.1250,27.1261,27.1250,27.1141,27.0489,27.1250,27.1141,27.1141,27.3543,27.1250,27.1141,27.1250,27.0489" -benchmark intrusive graph dependency handling with N nodes - 100,creating nodes,100,61,2403400,399.3433,398.9120,400.2339,3.0436,1.6131,5.6087,"397.6066,396.7869,405.1639,400.4098,399.2459,398.9180,400.4098,399.9016,399.5738,399.9180,398.7541,399.0820,400.3934,399.5902,396.1311,400.2295,399.9016,399.7541,400.2295,399.0820,399.4262,399.2459,400.0656,400.4098,399.9016,398.5902,398.9180,397.4426,397.6066,398.4426,400.3934,399.9016,399.5902,398.9180,398.9180,398.5902,399.7377,398.5902,398.0984,399.0820,399.9016,399.9180,400.2295,399.9016,400.9016,400.2295,400.2459,400.2295,399.7377,399.7541,398.2623,399.0820,399.9016,399.5902,398.7541,397.7705,399.7377,399.0820,399.4262,399.7377,398.7541,399.5738,399.0820,398.7541,398.9180,399.5738,399.5902,399.0820,399.2459,399.9180,400.0656,399.9016,400.0820,399.7377,398.2623,402.7049,400.0656,400.0656,398.9344,421.7377,413.1967,396.9508,396.9508,396.6230,396.2951,396.1311,398.2623,394.6557,397.2787,398.2623,397.2787,397.1148,396.7869,396.6230,397.4426,396.7869,397.6066,398.2623,394.6557,397.7705" -benchmark intrusive graph dependency handling with N nodes - 100,creating and adding dependencies,100,6,2706600,4518.2033,4511.0400,4546.0633,64.2809,10.9065,150.4837,"4505.0000,4514.8333,4608.3333,4505.0000,4498.1667,4518.1667,4503.3333,4513.1667,4506.5000,4513.3333,4516.5000,4505.0000,4498.1667,4501.5000,4516.6667,4516.5000,4486.5000,4491.5000,4506.6667,4496.5000,4503.1667,4518.3333,4503.1667,4503.1667,4505.0000,4494.8333,4508.1667,4518.3333,4494.8333,4518.1667,4515.0000,4508.1667,4513.1667,4516.5000,4508.1667,4536.6667,4511.5000,4525.0000,4509.8333,4508.3333,4483.1667,4513.1667,4498.1667,4505.0000,4509.8333,4489.8333,4506.6667,4514.8333,4513.1667,4518.3333,4519.8333,4510.0000,4508.1667,4533.3333,4519.8333,4520.0000,4526.5000,4531.6667,5142.6667,4511.5000,4513.1667,4513.3333,4523.1667,4533.3333,4526.5000,4521.5000,4511.5000,4506.6667,4518.1667,4511.6667,4516.5000,4508.1667,4516.5000,4499.8333,4501.5000,4501.5000,4513.1667,4516.6667,4523.1667,4528.3333,4519.8333,4511.6667,4504.8333,4511.5000,4505.0000,4509.8333,4499.8333,4503.3333,4496.5000,4513.1667,4518.3333,4514.8333,4506.5000,4515.0000,4508.1667,4493.1667,4513.3333,4519.8333,4523.3333,4511.5000" -benchmark intrusive graph dependency handling with N nodes - 100,adding and removing dependencies,100,5,2464000,4873.3520,4870.0900,4876.7120,16.8722,14.7902,19.5522,"4866.8000,4861.0000,4890.8000,4868.8000,4873.0000,4906.8000,4862.8000,4860.8000,4836.8000,4842.8000,4870.8000,4864.8000,4840.8000,4862.8000,4885.0000,4904.8000,4874.8000,4838.8000,4838.8000,4895.0000,4860.8000,4844.8000,4862.8000,4878.8000,4896.8000,4886.8000,4877.0000,4862.8000,4886.8000,4864.8000,4880.8000,4842.8000,4858.8000,4858.8000,4880.8000,4874.8000,4860.8000,4876.8000,4883.0000,4874.8000,4884.8000,4864.8000,4866.8000,4902.8000,4862.8000,4865.0000,4886.8000,4882.8000,4881.0000,4920.8000,4862.8000,4862.8000,4848.8000,4876.8000,4895.0000,4868.8000,4878.8000,4870.8000,4882.8000,4898.8000,4877.0000,4868.8000,4900.8000,4875.0000,4912.8000,4869.0000,4864.8000,4870.8000,4886.8000,4844.6000,4866.8000,4864.8000,4874.8000,4868.8000,4861.0000,4844.8000,4894.8000,4886.8000,4890.8000,4878.8000,4862.8000,4846.8000,4891.0000,4872.8000,4876.8000,4890.8000,4868.8000,4901.0000,4882.8000,4878.8000,4883.0000,4892.8000,4870.8000,4868.8000,4858.8000,4858.8000,4884.8000,4883.0000,4864.8000,4868.8000" -benchmark intrusive graph dependency handling with N nodes - 100,checking for dependencies,100,14,2550800,1774.3007,1773.6500,1775.0886,3.6600,2.9942,5.3685,"1776.0714,1773.2143,1793.2143,1771.0714,1777.5714,1774.6429,1770.3571,1773.2143,1773.2143,1771.0714,1767.5000,1773.2143,1771.0714,1771.7857,1773.2143,1772.5000,1770.3571,1766.0714,1770.3571,1771.7857,1773.9286,1771.7857,1774.7143,1773.2143,1769.6429,1774.6429,1774.6429,1773.9286,1771.7857,1773.9286,1771.7857,1767.5000,1771.7857,1772.5000,1773.2143,1773.9286,1774.6429,1771.7857,1770.3571,1776.0714,1774.6429,1773.9286,1772.5000,1772.5714,1773.9286,1771.0714,1776.0714,1773.2143,1771.7857,1774.6429,1773.9286,1778.2143,1773.9286,1778.9286,1779.7143,1777.5000,1771.7857,1773.2143,1772.5000,1770.3571,1774.7143,1774.6429,1780.3571,1780.3571,1778.2143,1778.9286,1774.6429,1780.4286,1779.6429,1776.0714,1776.7857,1772.5000,1776.0714,1766.7857,1771.7857,1778.2143,1779.6429,1776.8571,1773.9286,1773.2143,1770.3571,1777.5000,1776.8571,1774.6429,1773.2143,1777.5000,1777.5000,1768.2143,1774.7143,1778.9286,1776.0714,1775.3571,1774.7143,1774.6429,1768.9286,1776.0714,1778.2143,1778.2857,1777.5000,1775.3571" -benchmark task handling > without access thread,generating and deleting tasks,100,1,354260400,3515069.0400,3468106.6700,3569289.2400,256538.2472,191168.9214,358529.1577,"3532250.0000,3577827.0000,4176841.0000,3652959.0000,3544363.0000,3589400.0000,3549683.0000,3551126.0000,3598175.0000,3570783.0000,3538803.0000,3597835.0000,3544383.0000,3556666.0000,3580922.0000,3543792.0000,3532932.0000,3540767.0000,3584208.0000,3536347.0000,3581424.0000,3534684.0000,3540296.0000,3587796.0000,3535907.0000,3539624.0000,3077318.0000,2964445.0000,2971447.0000,2961840.0000,2958753.0000,2990684.0000,2965897.0000,4622425.0000,3243954.0000,2981057.0000,2964164.0000,2967350.0000,2950958.0000,3519185.0000,3524024.0000,3590661.0000,3538061.0000,3593887.0000,3541728.0000,3570453.0000,3537550.0000,3535316.0000,3528253.0000,3583497.0000,3525797.0000,3571645.0000,3524676.0000,3529705.0000,3564101.0000,3530827.0000,3520057.0000,3574480.0000,3534455.0000,3534835.0000,3569020.0000,3527211.0000,3529485.0000,3569941.0000,3534344.0000,3526440.0000,3577085.0000,3533363.0000,3591442.0000,3541888.0000,3532381.0000,3585181.0000,3527351.0000,3574590.0000,3524526.0000,3535105.0000,3534194.0000,3603325.0000,3529736.0000,3582776.0000,3529385.0000,3600249.0000,3520417.0000,3536419.0000,3572617.0000,3532761.0000,3527391.0000,3596952.0000,3532340.0000,3532451.0000,3578949.0000,3533071.0000,3542760.0000,3541988.0000,3578208.0000,3536138.0000,3565463.0000,3538592.0000,4728867.0000,3539224.0000" -benchmark task handling > with access thread,generating and deleting tasks with access thread,100,1,797709100,7985335.8200,7951017.2000,8023190.8300,182135.7661,128245.7361,254802.3158,"7899705.0000,7972753.0000,8146262.0000,8073314.0000,8027978.0000,7978704.0000,8008851.0000,7973815.0000,8030903.0000,8007419.0000,7984244.0000,7993593.0000,8003982.0000,7954348.0000,8002409.0000,8353404.0000,7970829.0000,7982802.0000,7994484.0000,7950231.0000,7881270.0000,7935272.0000,8367912.0000,7974656.0000,7987350.0000,7977974.0000,7973885.0000,8000716.0000,7873395.0000,7323502.0000,7368850.0000,7635594.0000,7358720.0000,7858116.0000,7962354.0000,7922347.0000,7957094.0000,8011506.0000,7950983.0000,7921886.0000,7961823.0000,8813977.0000,7952544.0000,8037687.0000,7949018.0000,7969967.0000,7967082.0000,8005886.0000,7926636.0000,7935172.0000,8176579.0000,8172723.0000,7903601.0000,7953146.0000,8013290.0000,7963375.0000,7964257.0000,7995256.0000,7963565.0000,7977743.0000,8007258.0000,7952505.0000,7955310.0000,8135321.0000,8763742.0000,8384232.0000,7970048.0000,7942486.0000,7934911.0000,8024812.0000,7997750.0000,7964317.0000,7964588.0000,7995997.0000,7952305.0000,8008370.0000,7956001.0000,7951874.0000,7896669.0000,7983404.0000,7992230.0000,8013019.0000,7949740.0000,7970469.0000,7969366.0000,8013401.0000,8004713.0000,7957555.0000,8000756.0000,7975869.0000,7967854.0000,7973104.0000,7975839.0000,7962794.0000,8020384.0000,7988663.0000,7942806.0000,7975027.0000,8004063.0000,7977291.0000" -generating large task graphs,soup topology,100,1,127355800,1319213.2700,1316264.4700,1323382.8500,17701.2835,13477.7069,21976.8459,"1312415.0000,1308607.0000,1330309.0000,1316482.0000,1311022.0000,1371767.0000,1311763.0000,1310732.0000,1317153.0000,1312956.0000,1313607.0000,1312915.0000,1310391.0000,1309669.0000,1360496.0000,1313297.0000,1312625.0000,1316372.0000,1313747.0000,1310100.0000,1373881.0000,1312996.0000,1310140.0000,1317635.0000,1310771.0000,1313026.0000,1373400.0000,1314048.0000,1316061.0000,1317024.0000,1310832.0000,1309900.0000,1315110.0000,1312835.0000,1311383.0000,1350647.0000,1313055.0000,1310301.0000,1315831.0000,1312264.0000,1310531.0000,1317204.0000,1310812.0000,1311402.0000,1374011.0000,1312554.0000,1308668.0000,1314959.0000,1311524.0000,1310851.0000,1358693.0000,1310611.0000,1310601.0000,1316703.0000,1313607.0000,1312715.0000,1317384.0000,1315651.0000,1312975.0000,1364453.0000,1311413.0000,1311954.0000,1319728.0000,1313137.0000,1312324.0000,1318446.0000,1314217.0000,1311193.0000,1371727.0000,1313516.0000,1311473.0000,1315240.0000,1311272.0000,1309348.0000,1313647.0000,1311733.0000,1311312.0000,1361938.0000,1309790.0000,1313978.0000,1315621.0000,1313527.0000,1308988.0000,1317514.0000,1309269.0000,1308136.0000,1351809.0000,1312265.0000,1311182.0000,1314649.0000,1312324.0000,1309900.0000,1309930.0000,1313056.0000,1309399.0000,1310341.0000,1376556.0000,1310741.0000,1312806.0000,1312454.0000" -generating large task graphs,chain topology,100,1,4038600,42958.7400,42367.1200,45835.2300,5733.8169,198.4669,13671.5538,"42238.0000,42489.0000,44272.0000,42809.0000,42529.0000,42719.0000,42429.0000,42498.0000,42048.0000,42168.0000,42098.0000,42258.0000,42238.0000,42088.0000,42188.0000,42148.0000,42058.0000,42108.0000,42208.0000,42378.0000,42218.0000,42127.0000,42048.0000,42278.0000,42168.0000,42459.0000,42028.0000,42027.0000,42128.0000,42168.0000,42248.0000,42308.0000,42048.0000,42298.0000,42158.0000,42358.0000,42449.0000,42288.0000,42348.0000,42419.0000,42518.0000,42187.0000,42398.0000,42499.0000,42498.0000,42389.0000,42398.0000,42569.0000,42328.0000,42469.0000,42448.0000,42749.0000,42529.0000,42599.0000,42388.0000,42639.0000,42268.0000,42599.0000,42559.0000,42659.0000,42499.0000,42478.0000,42158.0000,42379.0000,42258.0000,42629.0000,42618.0000,42288.0000,42438.0000,42158.0000,42469.0000,99947.0000,42649.0000,42339.0000,42729.0000,42639.0000,42729.0000,42528.0000,42449.0000,42579.0000,42328.0000,42639.0000,42388.0000,42599.0000,42328.0000,42468.0000,42459.0000,42328.0000,42238.0000,42378.0000,42258.0000,42298.0000,42048.0000,42468.0000,42228.0000,42148.0000,42419.0000,42138.0000,42318.0000,42208.0000" -generating large task graphs,expanding tree topology,100,1,6307300,57518.5500,56297.6600,59145.9000,7157.0705,5626.4276,8596.9044,"75651.0000,75411.0000,59140.0000,55083.0000,54721.0000,54671.0000,54872.0000,54772.0000,54241.0000,54321.0000,54532.0000,54491.0000,54421.0000,54031.0000,54361.0000,54421.0000,54161.0000,54431.0000,53950.0000,54151.0000,54371.0000,54351.0000,54452.0000,54481.0000,54411.0000,54391.0000,54472.0000,54351.0000,54551.0000,54441.0000,54561.0000,54481.0000,54281.0000,54482.0000,54331.0000,54120.0000,54091.0000,54321.0000,54151.0000,54932.0000,54902.0000,54391.0000,54552.0000,54782.0000,54241.0000,54922.0000,54972.0000,54491.0000,54311.0000,54872.0000,54581.0000,54702.0000,54722.0000,54671.0000,54402.0000,54471.0000,54641.0000,65682.0000,54912.0000,54973.0000,54731.0000,54522.0000,54461.0000,54932.0000,54722.0000,54662.0000,54451.0000,54471.0000,54281.0000,54592.0000,54391.0000,54712.0000,54120.0000,54311.0000,54442.0000,54802.0000,55252.0000,55063.0000,55062.0000,54842.0000,54862.0000,54702.0000,54722.0000,54672.0000,54531.0000,54562.0000,54431.0000,54922.0000,60523.0000,75771.0000,75732.0000,76313.0000,75902.0000,75491.0000,75611.0000,75291.0000,75982.0000,75581.0000,75491.0000,75892.0000" -generating large task graphs,contracting tree topology,100,1,10180800,101296.3100,100631.7900,104318.8900,6214.3051,633.5439,14749.6369,"100468.0000,101020.0000,105888.0000,101421.0000,101620.0000,101451.0000,100829.0000,101851.0000,100488.0000,101110.0000,100729.0000,100388.0000,100419.0000,100648.0000,101209.0000,100599.0000,100939.0000,100328.0000,100308.0000,100488.0000,101279.0000,100728.0000,101099.0000,100298.0000,162687.0000,101109.0000,100329.0000,100157.0000,100648.0000,100298.0000,100558.0000,101541.0000,100218.0000,100839.0000,100438.0000,100499.0000,100759.0000,100088.0000,100127.0000,100588.0000,100348.0000,100008.0000,100789.0000,100619.0000,100028.0000,100338.0000,101119.0000,100628.0000,100118.0000,100268.0000,100849.0000,100178.0000,100309.0000,99857.0000,100378.0000,100238.0000,100469.0000,100097.0000,100458.0000,100198.0000,100398.0000,100469.0000,100989.0000,103965.0000,100890.0000,101530.0000,100639.0000,100939.0000,100379.0000,100718.0000,100268.0000,100939.0000,100448.0000,100328.0000,100638.0000,99757.0000,100959.0000,100248.0000,99968.0000,100308.0000,100108.0000,100839.0000,100659.0000,100298.0000,100919.0000,100619.0000,100388.0000,101050.0000,100438.0000,100459.0000,100989.0000,100268.0000,100078.0000,100469.0000,100127.0000,100719.0000,100599.0000,100438.0000,100949.0000,100659.0000" -generating large task graphs,wave_sim topology,100,1,41244000,400732.2500,395256.9800,405239.2400,25301.7394,20555.7216,30099.4181,"408442.0000,407230.0000,363907.0000,348879.0000,343670.0000,347396.0000,343619.0000,343599.0000,342708.0000,342797.0000,342918.0000,342647.0000,342948.0000,342517.0000,342998.0000,342898.0000,343208.0000,444771.0000,408302.0000,408101.0000,407570.0000,406999.0000,407580.0000,406839.0000,408312.0000,407921.0000,406949.0000,410856.0000,407049.0000,407380.0000,407220.0000,408291.0000,407410.0000,408932.0000,408993.0000,408862.0000,411598.0000,408102.0000,407129.0000,408302.0000,408993.0000,407430.0000,407710.0000,408662.0000,408312.0000,408652.0000,462454.0000,408893.0000,408472.0000,408212.0000,408231.0000,408282.0000,407700.0000,409204.0000,407720.0000,408432.0000,412219.0000,408802.0000,408793.0000,408191.0000,408783.0000,407390.0000,407360.0000,409363.0000,408352.0000,408202.0000,453838.0000,408411.0000,407630.0000,408121.0000,407811.0000,407751.0000,407309.0000,407920.0000,408331.0000,413983.0000,408592.0000,407220.0000,407811.0000,406989.0000,409003.0000,406248.0000,408051.0000,407951.0000,408242.0000,412389.0000,408722.0000,407951.0000,408652.0000,408502.0000,408411.0000,408532.0000,407189.0000,408282.0000,408141.0000,462835.0000,409384.0000,408572.0000,406829.0000,406959.0000" -generating large task graphs,jacobi topology,100,1,13703000,136948.9900,136304.0500,139939.1700,6074.1708,540.4682,14436.1955,"136697.0000,135916.0000,139432.0000,136417.0000,136236.0000,136096.0000,136116.0000,136597.0000,136537.0000,136527.0000,136376.0000,135946.0000,136056.0000,136366.0000,136457.0000,136827.0000,136597.0000,139142.0000,136477.0000,136095.0000,135945.0000,136086.0000,136287.0000,135965.0000,136056.0000,136317.0000,136176.0000,136176.0000,136366.0000,136217.0000,136286.0000,136026.0000,135575.0000,135585.0000,136166.0000,136126.0000,136587.0000,136186.0000,135805.0000,136296.0000,136587.0000,136216.0000,136386.0000,135965.0000,136096.0000,136487.0000,197071.0000,136576.0000,135866.0000,135865.0000,136267.0000,136727.0000,136436.0000,135925.0000,136286.0000,136487.0000,136016.0000,136006.0000,136186.0000,135775.0000,136717.0000,135926.0000,135735.0000,136487.0000,136196.0000,136186.0000,136086.0000,136437.0000,136036.0000,135855.0000,136607.0000,135996.0000,135805.0000,136337.0000,135665.0000,139572.0000,136227.0000,136266.0000,136537.0000,136476.0000,137088.0000,136737.0000,136096.0000,135936.0000,136166.0000,136717.0000,136497.0000,136477.0000,136035.0000,136236.0000,136767.0000,136116.0000,135455.0000,136055.0000,136606.0000,136156.0000,137157.0000,136596.0000,136176.0000,136316.0000" -generating large command graphs for N nodes - 1,soup topology,100,1,206500900,1958748.1500,1920960.8900,1990848.5800,176232.3246,154547.8744,192702.3865,"2053659.0000,2058388.0000,1660033.0000,1653852.0000,1658490.0000,1655124.0000,1660023.0000,1653661.0000,1667557.0000,1658981.0000,1656637.0000,1922300.0000,2054581.0000,2096560.0000,2052768.0000,2063818.0000,2054200.0000,2118392.0000,2049781.0000,2059921.0000,2051345.0000,2059369.0000,2101730.0000,2057496.0000,2055202.0000,2052306.0000,2056654.0000,2054660.0000,2093635.0000,2052877.0000,2056113.0000,2054420.0000,2059169.0000,2050423.0000,2114594.0000,2049802.0000,2056805.0000,2051425.0000,2100788.0000,2050674.0000,2057496.0000,2050914.0000,2110586.0000,2053658.0000,2062136.0000,2052757.0000,2057867.0000,2053589.0000,2113482.0000,2050863.0000,2058168.0000,2049350.0000,2097142.0000,2058007.0000,2054912.0000,2061063.0000,2048259.0000,2090348.0000,2051525.0000,2055763.0000,2050543.0000,2058288.0000,2053358.0000,2114985.0000,2049972.0000,2058278.0000,2051705.0000,2101129.0000,2049852.0000,2063096.0000,2052917.0000,1845545.0000,1653931.0000,1673098.0000,1653612.0000,1655665.0000,1658009.0000,1654924.0000,1661876.0000,1652999.0000,1655375.0000,1668389.0000,1656636.0000,1664130.0000,1654993.0000,1672126.0000,1658851.0000,1652990.0000,1963788.0000,2057747.0000,2057787.0000,2053138.0000,2122880.0000,2055552.0000,2055392.0000,2051484.0000,2056243.0000,2110186.0000,2056996.0000,2060252.0000" -generating large command graphs for N nodes - 1,chain topology,100,1,13306900,150596.6700,146627.4100,155362.3000,22118.8897,19121.7213,24550.6194,"137719.0000,137548.0000,143610.0000,139682.0000,138541.0000,138090.0000,138570.0000,138361.0000,138390.0000,138541.0000,138620.0000,137960.0000,137959.0000,137870.0000,137849.0000,138621.0000,138230.0000,138150.0000,144331.0000,138710.0000,138069.0000,137599.0000,137990.0000,138220.0000,138220.0000,137659.0000,137498.0000,138089.0000,137990.0000,138150.0000,138100.0000,138180.0000,137518.0000,137729.0000,137668.0000,137739.0000,137889.0000,137559.0000,138490.0000,138301.0000,137819.0000,137979.0000,137598.0000,137970.0000,137849.0000,138160.0000,137629.0000,178657.0000,138581.0000,138240.0000,138019.0000,138270.0000,138260.0000,137729.0000,137448.0000,137860.0000,137268.0000,137879.0000,138160.0000,137309.0000,137659.0000,137719.0000,137659.0000,147257.0000,191140.0000,191311.0000,191090.0000,190760.0000,191050.0000,190990.0000,191010.0000,191070.0000,198615.0000,192152.0000,191331.0000,191371.0000,191170.0000,191240.0000,190899.0000,191310.0000,191160.0000,190248.0000,190559.0000,191050.0000,189387.0000,172164.0000,140144.0000,138260.0000,137168.0000,137599.0000,137448.0000,137579.0000,137399.0000,137248.0000,137448.0000,137459.0000,181652.0000,138501.0000,137549.0000,138149.0000" -generating large command graphs for N nodes - 1,expanding tree topology,100,1,18845000,190034.2100,188805.9600,193155.0600,8968.3711,1756.5067,16824.0257,"188355.0000,189076.0000,201209.0000,190399.0000,189838.0000,189287.0000,189487.0000,189387.0000,188906.0000,193094.0000,188315.0000,187734.0000,188966.0000,188315.0000,188756.0000,188495.0000,188155.0000,189307.0000,188234.0000,189356.0000,188816.0000,188626.0000,189377.0000,188655.0000,188625.0000,187854.0000,188445.0000,189047.0000,188996.0000,189377.0000,244732.0000,189256.0000,188435.0000,188705.0000,189718.0000,189387.0000,189417.0000,188776.0000,188575.0000,188495.0000,188265.0000,187974.0000,187583.0000,187674.0000,188445.0000,188425.0000,187984.0000,188004.0000,188325.0000,189257.0000,188545.0000,192603.0000,188175.0000,189076.0000,187463.0000,188466.0000,188014.0000,187984.0000,188335.0000,188065.0000,186952.0000,188485.0000,187524.0000,187433.0000,186361.0000,187854.0000,187664.0000,187854.0000,187814.0000,188004.0000,187925.0000,187834.0000,258027.0000,188906.0000,188274.0000,188114.0000,188105.0000,188465.0000,188836.0000,189066.0000,188355.0000,188125.0000,189126.0000,188796.0000,188836.0000,188535.0000,188946.0000,188164.0000,188135.0000,189306.0000,188164.0000,188836.0000,188235.0000,195178.0000,188816.0000,188375.0000,188705.0000,188084.0000,188696.0000,187864.0000" -generating large command graphs for N nodes - 1,contracting tree topology,100,1,23291300,233637.4400,232584.5200,236228.7800,7664.2099,1560.5739,13886.5720,"233130.0000,233430.0000,238681.0000,231977.0000,231046.0000,236677.0000,233100.0000,231556.0000,230344.0000,231186.0000,231727.0000,232239.0000,231186.0000,232468.0000,231136.0000,232279.0000,232578.0000,232248.0000,231256.0000,231456.0000,231055.0000,231427.0000,289627.0000,234122.0000,233150.0000,232608.0000,231576.0000,232168.0000,232789.0000,232558.0000,231397.0000,231897.0000,231296.0000,232358.0000,231928.0000,231066.0000,232468.0000,231808.0000,231957.0000,236687.0000,233450.0000,233120.0000,232318.0000,231596.0000,231857.0000,231366.0000,231838.0000,232448.0000,231778.0000,233159.0000,230965.0000,232178.0000,234162.0000,231677.0000,231417.0000,232859.0000,233491.0000,239502.0000,233140.0000,233180.0000,231737.0000,233030.0000,233129.0000,233360.0000,232478.0000,231887.0000,232509.0000,233300.0000,232739.0000,231727.0000,231517.0000,231747.0000,232709.0000,232258.0000,282603.0000,232899.0000,232098.0000,231667.0000,232879.0000,232930.0000,232799.0000,232488.0000,232999.0000,232008.0000,231897.0000,232930.0000,231807.0000,231898.0000,231997.0000,231887.0000,231867.0000,239132.0000,234362.0000,232929.0000,232278.0000,231176.0000,232568.0000,233750.0000,231807.0000,232819.0000" -generating large command graphs for N nodes - 1,wave_sim topology,100,1,116672900,1165126.0700,1162050.0300,1169523.1400,18601.0964,14179.3688,23090.7133,"1158583.0000,1161819.0000,1198689.0000,1165376.0000,1155007.0000,1213136.0000,1156810.0000,1156249.0000,1157591.0000,1164174.0000,1154084.0000,1156890.0000,1198760.0000,1157972.0000,1159245.0000,1158763.0000,1168001.0000,1157461.0000,1158062.0000,1164063.0000,1157842.0000,1158232.0000,1157261.0000,1219268.0000,1156439.0000,1157551.0000,1160867.0000,1158784.0000,1158613.0000,1222634.0000,1161349.0000,1158042.0000,1160217.0000,1164274.0000,1159795.0000,1158583.0000,1162179.0000,1159534.0000,1159184.0000,1157260.0000,1226783.0000,1162350.0000,1159054.0000,1164805.0000,1158643.0000,1155618.0000,1157872.0000,1162440.0000,1154486.0000,1155137.0000,1215942.0000,1157982.0000,1152171.0000,1162531.0000,1153162.0000,1153834.0000,1152912.0000,1199781.0000,1156439.0000,1152772.0000,1160156.0000,1156028.0000,1153303.0000,1154656.0000,1160517.0000,1154766.0000,1155147.0000,1221843.0000,1157732.0000,1156198.0000,1156510.0000,1161628.0000,1155296.0000,1158522.0000,1164143.0000,1158473.0000,1160767.0000,1157841.0000,1215531.0000,1159635.0000,1158874.0000,1162180.0000,1157051.0000,1156199.0000,1161128.0000,1157080.0000,1156549.0000,1157320.0000,1221192.0000,1158814.0000,1157901.0000,1164404.0000,1157561.0000,1157551.0000,1154375.0000,1164465.0000,1158873.0000,1156940.0000,1215151.0000,1156950.0000" -generating large command graphs for N nodes - 1,jacobi topology,100,1,40242900,406755.5500,405410.6100,409369.3300,9192.9495,5113.8254,13934.7453,"402009.0000,452295.0000,411858.0000,410296.0000,404945.0000,404214.0000,404445.0000,404534.0000,405336.0000,405195.0000,404675.0000,405446.0000,457755.0000,406378.0000,404044.0000,405216.0000,404153.0000,406478.0000,405246.0000,405296.0000,405125.0000,404564.0000,407681.0000,405646.0000,405205.0000,405105.0000,404485.0000,404595.0000,404825.0000,405496.0000,404064.0000,403212.0000,410356.0000,405176.0000,405386.0000,405146.0000,405166.0000,405706.0000,404083.0000,404805.0000,405206.0000,405186.0000,446434.0000,405997.0000,405116.0000,404845.0000,404425.0000,406718.0000,404094.0000,405867.0000,405847.0000,405487.0000,410375.0000,405536.0000,406017.0000,405036.0000,404364.0000,403783.0000,404114.0000,403202.0000,403312.0000,406337.0000,407309.0000,402491.0000,401719.0000,401810.0000,402851.0000,403342.0000,402170.0000,402400.0000,401910.0000,445732.0000,406468.0000,403552.0000,405386.0000,402170.0000,405526.0000,404815.0000,404294.0000,404925.0000,404344.0000,410937.0000,405536.0000,404825.0000,405025.0000,404926.0000,404164.0000,404454.0000,404004.0000,404274.0000,404825.0000,410025.0000,405156.0000,404324.0000,404345.0000,404314.0000,403703.0000,401909.0000,400857.0000,401799.0000" -generating large command graphs for N nodes - 4,soup topology,100,1,247598100,2419506.5700,2380815.6800,2452835.6200,182995.0288,150711.9183,219848.1718,"2460861.0000,2466431.0000,2510715.0000,2466471.0000,2465639.0000,2505816.0000,2461262.0000,2467182.0000,2466672.0000,2679494.0000,2604762.0000,2467703.0000,2459999.0000,2520774.0000,2458476.0000,2468154.0000,2463476.0000,2458105.0000,2509122.0000,2464698.0000,2457724.0000,2465519.0000,2447344.0000,2494213.0000,2453827.0000,2442736.0000,2448868.0000,2996776.0000,2563404.0000,2465649.0000,2462233.0000,2528789.0000,2470338.0000,2463085.0000,2471460.0000,2465058.0000,2516345.0000,2467262.0000,2461101.0000,2501878.0000,2468034.0000,2458255.0000,2533398.0000,2463896.0000,2467873.0000,2527797.0000,2459507.0000,2502750.0000,2464086.0000,2461051.0000,2525733.0000,2459587.0000,2468775.0000,2464267.0000,2457734.0000,2528479.0000,2467753.0000,2461572.0000,2290688.0000,2019965.0000,2033902.0000,2017590.0000,2026718.0000,2017290.0000,2040244.0000,2020035.0000,2028121.0000,2021408.0000,2034974.0000,2021728.0000,2038931.0000,2019474.0000,2023912.0000,2017831.0000,2367754.0000,2469126.0000,2461092.0000,2514101.0000,2455010.0000,2460790.0000,2460179.0000,2461953.0000,2499854.0000,2455630.0000,2462183.0000,2459989.0000,2458837.0000,2522968.0000,2468985.0000,2456502.0000,2462303.0000,2460009.0000,2519031.0000,2464487.0000,2450380.0000,2495877.0000,2444599.0000,2771859.0000,2805313.0000,2459067.0000" -generating large command graphs for N nodes - 4,chain topology,100,1,37608300,378250.5000,375265.9900,384215.5500,20656.4143,12334.7824,34345.8435,"372133.0000,373185.0000,388454.0000,373957.0000,373927.0000,373826.0000,373887.0000,374858.0000,377203.0000,374648.0000,373876.0000,373085.0000,373917.0000,373305.0000,374327.0000,374027.0000,372584.0000,373606.0000,371692.0000,379908.0000,372795.0000,372233.0000,373756.0000,370681.0000,371051.0000,463787.0000,516066.0000,459679.0000,371061.0000,406468.0000,371682.0000,370971.0000,372624.0000,373286.0000,373586.0000,374327.0000,373396.0000,373937.0000,373626.0000,373185.0000,376491.0000,373105.0000,372624.0000,372534.0000,373495.0000,373315.0000,374087.0000,374267.0000,372314.0000,372453.0000,433729.0000,374617.0000,374328.0000,373215.0000,372514.0000,373906.0000,372453.0000,373747.0000,372984.0000,373435.0000,372795.0000,378274.0000,371902.0000,373927.0000,372253.0000,373837.0000,371482.0000,371973.0000,371812.0000,372494.0000,371863.0000,372604.0000,376742.0000,373816.0000,371582.0000,372894.0000,373185.0000,373195.0000,373375.0000,374137.0000,373456.0000,372905.0000,435733.0000,373776.0000,372083.0000,373736.0000,372484.0000,373206.0000,372323.0000,372534.0000,373216.0000,373215.0000,371733.0000,378254.0000,374207.0000,373065.0000,372604.0000,372514.0000,372403.0000,373246.0000" -generating large command graphs for N nodes - 4,expanding tree topology,100,1,46638600,465948.5200,463983.1000,469482.0900,13069.1606,8227.9876,18846.7954,"464017.0000,469878.0000,475158.0000,463486.0000,462704.0000,463617.0000,526926.0000,462264.0000,461703.0000,461563.0000,464007.0000,462244.0000,463076.0000,463015.0000,463206.0000,468215.0000,460821.0000,462144.0000,461412.0000,461111.0000,460481.0000,460701.0000,463576.0000,512570.0000,460941.0000,462054.0000,459929.0000,462965.0000,460901.0000,462274.0000,461673.0000,462785.0000,469247.0000,462735.0000,460821.0000,461483.0000,461783.0000,462735.0000,460961.0000,463145.0000,462164.0000,469658.0000,465640.0000,466451.0000,463406.0000,462995.0000,460330.0000,463566.0000,461392.0000,523600.0000,463446.0000,464879.0000,462374.0000,463355.0000,461642.0000,461573.0000,462705.0000,465019.0000,467875.0000,462865.0000,462244.0000,462855.0000,461472.0000,461192.0000,461523.0000,462124.0000,468445.0000,461102.0000,462334.0000,462935.0000,462714.0000,463737.0000,465490.0000,460872.0000,462233.0000,517027.0000,464318.0000,464047.0000,462845.0000,462083.0000,461583.0000,461643.0000,462835.0000,463125.0000,464248.0000,461332.0000,461293.0000,462113.0000,461282.0000,462494.0000,462324.0000,463767.0000,528570.0000,462354.0000,462464.0000,463506.0000,461853.0000,462845.0000,461993.0000,462374.0000" -generating large command graphs for N nodes - 4,contracting tree topology,100,1,51918600,508960.8400,507212.2500,512139.6600,11660.6017,7386.3612,16736.1148,"506698.0000,506327.0000,515585.0000,507760.0000,509433.0000,506548.0000,506668.0000,507299.0000,515325.0000,507078.0000,505175.0000,506578.0000,506037.0000,506698.0000,505396.0000,506107.0000,565219.0000,506017.0000,503231.0000,506758.0000,505235.0000,505736.0000,505727.0000,504193.0000,512149.0000,503622.0000,505496.0000,504634.0000,504814.0000,504103.0000,504974.0000,506547.0000,558446.0000,506157.0000,505305.0000,506448.0000,506768.0000,507019.0000,505366.0000,514503.0000,505826.0000,505646.0000,507289.0000,506017.0000,506046.0000,505355.0000,505746.0000,512509.0000,504754.0000,504444.0000,506077.0000,507620.0000,506428.0000,505726.0000,503111.0000,549930.0000,505125.0000,505275.0000,505075.0000,507440.0000,504634.0000,507059.0000,504043.0000,511507.0000,505526.0000,506127.0000,507870.0000,504885.0000,504153.0000,503693.0000,504374.0000,562684.0000,505446.0000,505937.0000,504023.0000,505776.0000,505165.0000,504224.0000,504444.0000,511006.0000,503823.0000,505466.0000,505175.0000,507089.0000,506387.0000,505817.0000,505215.0000,511758.0000,506417.0000,507259.0000,506518.0000,507149.0000,506938.0000,503853.0000,556262.0000,507159.0000,506568.0000,506287.0000,505506.0000,504244.0000" -generating large command graphs for N nodes - 4,wave_sim topology,100,1,238498300,2328314.1000,2299719.3900,2355970.7000,143627.3630,117072.0495,191125.7217,"2352375.0000,2363907.0000,2071613.0000,2037198.0000,2028771.0000,2035704.0000,2025765.0000,2041385.0000,2023893.0000,2028021.0000,2026317.0000,2036506.0000,2026827.0000,2033230.0000,2027208.0000,2028831.0000,2020666.0000,2135694.0000,2371051.0000,2366953.0000,2357675.0000,2408611.0000,2371461.0000,2363827.0000,2413871.0000,2354769.0000,2362594.0000,2368265.0000,2361411.0000,2409623.0000,2357394.0000,2368115.0000,2358677.0000,2358887.0000,2428028.0000,2362034.0000,2368766.0000,2368856.0000,2354809.0000,2422547.0000,2356884.0000,2365009.0000,2363896.0000,2408632.0000,2370198.0000,2358496.0000,2364447.0000,2362514.0000,2420042.0000,2369256.0000,2355200.0000,2359408.0000,2357003.0000,2413461.0000,2364818.0000,2353637.0000,2398702.0000,2359809.0000,2365770.0000,2366852.0000,2352765.0000,2423249.0000,2360541.0000,2361692.0000,2404354.0000,2359127.0000,2366071.0000,2353577.0000,2362655.0000,2418850.0000,2360670.0000,2367754.0000,2357023.0000,2364778.0000,2352084.0000,2413892.0000,2362093.0000,2976006.0000,2627817.0000,2419231.0000,2355731.0000,2373014.0000,2355661.0000,2402850.0000,2361853.0000,2355701.0000,2399214.0000,2363766.0000,2367574.0000,2408221.0000,2359077.0000,2363015.0000,2355200.0000,2400326.0000,2363005.0000,2359057.0000,2360229.0000,2358226.0000,2406407.0000,2362885.0000" -generating large command graphs for N nodes - 4,jacobi topology,100,1,91024600,832185.3300,820621.2400,844412.6900,60791.5230,57226.8424,62751.2472,"781128.0000,782350.0000,919340.0000,910483.0000,906886.0000,904491.0000,905264.0000,907307.0000,913258.0000,906376.0000,906555.0000,907006.0000,909952.0000,905023.0000,905103.0000,905483.0000,915081.0000,909421.0000,908579.0000,909492.0000,906656.0000,907988.0000,904542.0000,909361.0000,908620.0000,915913.0000,906105.0000,910393.0000,907818.0000,908288.0000,907467.0000,901155.0000,898139.0000,900244.0000,905875.0000,899903.0000,903330.0000,903009.0000,907207.0000,904121.0000,821876.0000,777912.0000,781749.0000,791989.0000,781218.0000,777391.0000,781038.0000,779495.0000,792880.0000,784044.0000,780467.0000,779274.0000,781269.0000,783532.0000,786158.0000,787129.0000,780526.0000,781348.0000,781879.0000,788983.0000,783222.0000,782240.0000,779245.0000,781038.0000,787420.0000,782491.0000,781669.0000,782070.0000,778974.0000,788452.0000,782451.0000,780537.0000,777010.0000,779856.0000,783853.0000,781078.0000,780998.0000,781719.0000,780637.0000,842414.0000,922415.0000,785547.0000,784775.0000,783031.0000,787600.0000,782140.0000,781749.0000,780116.0000,777832.0000,785757.0000,778954.0000,780437.0000,781800.0000,779204.0000,789974.0000,781208.0000,780497.0000,780767.0000,776209.0000,786278.0000" -generating large command graphs for N nodes - 16,soup topology,100,1,298928700,2978129.1200,2926224.9600,3023925.8700,248980.4925,225292.2070,265557.9938,"3149275.0000,3143965.0000,3161018.0000,3152901.0000,3151969.0000,3148944.0000,3152160.0000,3147852.0000,3154696.0000,3150727.0000,3149204.0000,3156047.0000,3142822.0000,3148774.0000,3144015.0000,3143865.0000,3148312.0000,3148062.0000,3150066.0000,3151589.0000,3147090.0000,3141049.0000,3147882.0000,3149995.0000,3155867.0000,2799301.0000,2597289.0000,2598301.0000,2592450.0000,2599673.0000,2601667.0000,2596407.0000,2595275.0000,2597019.0000,2600434.0000,2598300.0000,2705584.0000,3147421.0000,3150056.0000,3153011.0000,3151479.0000,3148834.0000,3144906.0000,3145088.0000,3143864.0000,3145157.0000,3162670.0000,3140949.0000,3154475.0000,3152320.0000,3149826.0000,3143393.0000,3149515.0000,3138574.0000,3142812.0000,3149104.0000,3143174.0000,3148103.0000,3147531.0000,3145568.0000,3210590.0000,3133594.0000,3145036.0000,3149996.0000,3148263.0000,2884553.0000,2604973.0000,2595886.0000,2590726.0000,2628478.0000,2604022.0000,2593231.0000,2599943.0000,2709491.0000,3159935.0000,3160035.0000,3157250.0000,2676769.0000,2583293.0000,2595446.0000,2597719.0000,2591989.0000,2603270.0000,2596458.0000,2591918.0000,2598171.0000,2593462.0000,2574456.0000,2865327.0000,3156338.0000,3146699.0000,3146499.0000,3152060.0000,3151419.0000,3151699.0000,3140688.0000,3150367.0000,3153492.0000,3152550.0000,3149145.0000" -generating large command graphs for N nodes - 16,chain topology,100,1,122669200,1220057.9800,1203579.0700,1235245.1500,80687.7477,72115.0472,93561.0216,"1263582.0000,1262139.0000,1270024.0000,1258101.0000,1328394.0000,1260826.0000,1258583.0000,1265746.0000,1262801.0000,1258523.0000,1259044.0000,1267660.0000,1259875.0000,1257771.0000,1264884.0000,1261699.0000,1261007.0000,1266097.0000,1257070.0000,1258933.0000,1265326.0000,1262810.0000,1262150.0000,1266037.0000,1259294.0000,1261238.0000,1265536.0000,1258422.0000,1259124.0000,1258592.0000,1467849.0000,1258582.0000,1258202.0000,1266828.0000,1262109.0000,1263091.0000,1269243.0000,1261308.0000,1261588.0000,1270195.0000,1259384.0000,1258492.0000,1264965.0000,1260236.0000,1261869.0000,1266729.0000,1259213.0000,1257320.0000,1267329.0000,1266207.0000,1263061.0000,1260697.0000,1267539.0000,1261928.0000,1261417.0000,1379882.0000,1257270.0000,1258853.0000,1264874.0000,1260968.0000,1259985.0000,1266999.0000,1260977.0000,1260657.0000,1263742.0000,1257701.0000,1261137.0000,1281906.0000,1262851.0000,1093961.0000,1092297.0000,1098108.0000,1093540.0000,1093410.0000,1100333.0000,1093129.0000,1094582.0000,1093931.0000,1099191.0000,1093309.0000,1092108.0000,1091606.0000,1102417.0000,1094932.0000,1094362.0000,1100192.0000,1092909.0000,1093921.0000,1092327.0000,1101285.0000,1094321.0000,1093320.0000,1093580.0000,1104461.0000,1094291.0000,1094813.0000,1137203.0000,1263702.0000,1261157.0000,1272629.0000" -generating large command graphs for N nodes - 16,expanding tree topology,100,1,112474900,1166405.7200,1165129.1000,1169126.4700,9095.4604,4921.6912,17303.3906,"1167149.0000,1165687.0000,1204740.0000,1171708.0000,1165015.0000,1165245.0000,1169674.0000,1164694.0000,1163443.0000,1162871.0000,1161689.0000,1164895.0000,1163412.0000,1169744.0000,1166037.0000,1163222.0000,1173151.0000,1165677.0000,1163342.0000,1159385.0000,1171287.0000,1159855.0000,1162140.0000,1172870.0000,1165016.0000,1162771.0000,1166568.0000,1182939.0000,1162931.0000,1163092.0000,1172459.0000,1164474.0000,1165586.0000,1164073.0000,1169043.0000,1160737.0000,1163683.0000,1170817.0000,1160867.0000,1162751.0000,1171959.0000,1162060.0000,1163382.0000,1165236.0000,1172008.0000,1165857.0000,1163833.0000,1173001.0000,1167019.0000,1165757.0000,1162901.0000,1166539.0000,1165797.0000,1167570.0000,1171458.0000,1165506.0000,1164504.0000,1164674.0000,1168903.0000,1164965.0000,1163663.0000,1167099.0000,1167409.0000,1158303.0000,1160727.0000,1153955.0000,1155166.0000,1151801.0000,1164534.0000,1158794.0000,1161269.0000,1170696.0000,1164204.0000,1163081.0000,1161980.0000,1235088.0000,1163723.0000,1165426.0000,1167540.0000,1164625.0000,1162100.0000,1164324.0000,1167370.0000,1162731.0000,1165897.0000,1168802.0000,1160997.0000,1162831.0000,1172730.0000,1167881.0000,1164885.0000,1161088.0000,1170556.0000,1162621.0000,1161348.0000,1170055.0000,1164605.0000,1161538.0000,1167790.0000,1173672.0000" -generating large command graphs for N nodes - 16,contracting tree topology,100,1,113824900,1158620.2200,1138990.4400,1177993.1900,99810.7254,97347.8920,101913.2143,"1051430.0000,1056921.0000,1282137.0000,1255677.0000,1257371.0000,1252821.0000,1250657.0000,1247392.0000,1254474.0000,1251449.0000,1249174.0000,1257039.0000,1142212.0000,1051962.0000,1056419.0000,1051791.0000,1049066.0000,1052342.0000,1056831.0000,1052101.0000,1049787.0000,1051650.0000,1060597.0000,1051971.0000,1045829.0000,1045189.0000,1053173.0000,1043916.0000,1046391.0000,1047833.0000,1049947.0000,1046802.0000,1047432.0000,1050679.0000,1046080.0000,1042864.0000,1046611.0000,1056870.0000,1042203.0000,1047663.0000,1043485.0000,1121553.0000,1244376.0000,1248123.0000,1259765.0000,1249385.0000,1250708.0000,1248353.0000,1254825.0000,1252360.0000,1249245.0000,1253063.0000,1249886.0000,1247582.0000,1260466.0000,1253893.0000,1253433.0000,1261368.0000,1256268.0000,1258583.0000,1262470.0000,1252261.0000,1253994.0000,1270715.0000,1256920.0000,1252431.0000,1249786.0000,1253192.0000,1251579.0000,1252731.0000,1256028.0000,1247943.0000,1246600.0000,1258453.0000,1250467.0000,1249515.0000,1251959.0000,1247371.0000,1248674.0000,1254334.0000,1250698.0000,1251850.0000,1249846.0000,1256348.0000,1116162.0000,1053604.0000,1061228.0000,1050779.0000,1052832.0000,1050638.0000,1058954.0000,1053193.0000,1053484.0000,1052862.0000,1056920.0000,1049627.0000,1051069.0000,1049908.0000,1057161.0000,1047963.0000" -generating large command graphs for N nodes - 16,wave_sim topology,100,1,441401700,4438359.0300,4370537.1500,4496630.4700,321458.2096,282112.4536,351585.4431,"4640580.0000,4615312.0000,3942728.0000,3878536.0000,3857837.0000,3861344.0000,3848349.0000,3871182.0000,3854731.0000,4368625.0000,4636783.0000,4645549.0000,4622786.0000,4629309.0000,4644718.0000,4636653.0000,4635600.0000,4638495.0000,4645058.0000,4633477.0000,4639568.0000,4634167.0000,4627515.0000,4640249.0000,4642584.0000,4627314.0000,4627986.0000,4634619.0000,4619751.0000,4122869.0000,3906649.0000,3894366.0000,4442615.0000,4668013.0000,4636091.0000,4619549.0000,4619830.0000,4631723.0000,4639459.0000,4628547.0000,4654376.0000,4654826.0000,4667622.0000,4644778.0000,4646121.0000,4629459.0000,4642393.0000,4627917.0000,4637254.0000,4623086.0000,4631532.0000,4640431.0000,4643856.0000,4646521.0000,4649156.0000,4401036.0000,3884728.0000,3897111.0000,3859560.0000,3874569.0000,3869779.0000,3886471.0000,3884779.0000,4070720.0000,4627956.0000,4636562.0000,4637965.0000,4632465.0000,4643235.0000,4647012.0000,4639368.0000,4638897.0000,4626724.0000,4628627.0000,4644327.0000,4634158.0000,4559597.0000,3885529.0000,3898765.0000,3882213.0000,3887744.0000,3897683.0000,3887483.0000,3899396.0000,3981601.0000,4632896.0000,4635421.0000,4623347.0000,4692679.0000,4626573.0000,4670387.0000,4637894.0000,4625652.0000,4620772.0000,4630210.0000,4636162.0000,4642494.0000,4624519.0000,4612557.0000,4627836.0000" -generating large command graphs for N nodes - 16,jacobi topology,100,1,247496700,2514141.4700,2477077.1000,2545318.5800,172131.4455,149880.9353,189252.0339,"2604984.0000,2609852.0000,2618349.0000,2602018.0000,2608380.0000,2611155.0000,2608480.0000,2614812.0000,2619641.0000,2617678.0000,2627836.0000,2624450.0000,2612989.0000,2616014.0000,2617618.0000,2602659.0000,2611727.0000,2619180.0000,2608090.0000,2625672.0000,2627697.0000,2615032.0000,2627316.0000,2618389.0000,2616906.0000,2627837.0000,2633217.0000,2615273.0000,2627205.0000,2626293.0000,2621515.0000,2620173.0000,2622607.0000,2623078.0000,2630161.0000,2614532.0000,2627155.0000,2622958.0000,2614311.0000,2419382.0000,2231796.0000,2210075.0000,2211958.0000,2202851.0000,2206929.0000,2198623.0000,2196429.0000,2198363.0000,2199265.0000,2252827.0000,2606817.0000,2601236.0000,2594584.0000,2599112.0000,2619331.0000,2609612.0000,2617688.0000,2620763.0000,2606396.0000,2619992.0000,2624621.0000,2613880.0000,2614903.0000,2625923.0000,2607659.0000,2617798.0000,2623759.0000,2612177.0000,2621184.0000,2616835.0000,2616646.0000,2614692.0000,2417608.0000,2212470.0000,2224663.0000,2204555.0000,2208512.0000,2203273.0000,2215896.0000,2231115.0000,2234592.0000,2229452.0000,2210837.0000,2212319.0000,2196680.0000,2216217.0000,2204134.0000,2487672.0000,2627115.0000,2621575.0000,2612448.0000,2603441.0000,2604603.0000,2603681.0000,2617958.0000,2612808.0000,2614913.0000,2607448.0000,2611035.0000,2619782.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,soup topology,100,1,200578300,2027151.5300,1993235.0400,2053168.6500,151484.4929,122504.4179,177732.0360,"2097783.0000,2100117.0000,2115336.0000,2096230.0000,2101379.0000,2097051.0000,2104455.0000,2092954.0000,2103864.0000,2100497.0000,2093524.0000,2098804.0000,2094757.0000,2098033.0000,2098975.0000,2097853.0000,2090178.0000,2103694.0000,1971152.0000,1682306.0000,1669791.0000,1675142.0000,1678519.0000,1672407.0000,1681674.0000,1667668.0000,1668509.0000,1662027.0000,1660504.0000,1956585.0000,2079889.0000,2089567.0000,2086521.0000,2086041.0000,2084087.0000,2089967.0000,2102932.0000,2098634.0000,2098554.0000,2096861.0000,2098975.0000,2098083.0000,2101259.0000,2089197.0000,2101720.0000,2093194.0000,2100928.0000,1932149.0000,1688668.0000,1671665.0000,1673248.0000,1675973.0000,1674049.0000,1938581.0000,2092382.0000,2088034.0000,2082814.0000,2088755.0000,2087954.0000,2088886.0000,2081962.0000,2085119.0000,2091070.0000,2087874.0000,2103133.0000,2097241.0000,2101499.0000,2095258.0000,2098464.0000,2090689.0000,2107210.0000,2092172.0000,2100097.0000,2094036.0000,2106098.0000,2093445.0000,2099195.0000,2095708.0000,2101740.0000,2093044.0000,2100337.0000,2096350.0000,2105467.0000,2102411.0000,2097612.0000,2099917.0000,2095969.0000,2108503.0000,2093314.0000,2106038.0000,2093084.0000,2097171.0000,2096140.0000,2100648.0000,2093946.0000,2100918.0000,2096711.0000,2101069.0000,2100858.0000,2102301.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,chain topology,100,1,13599000,118540.3300,118351.2400,118908.2400,1295.0131,692.5089,2013.4466,"118131.0000,118192.0000,126248.0000,118953.0000,118753.0000,118082.0000,118423.0000,117951.0000,118022.0000,118312.0000,118422.0000,118703.0000,118132.0000,118122.0000,118122.0000,118202.0000,117531.0000,118462.0000,123482.0000,118773.0000,118623.0000,117961.0000,118453.0000,118162.0000,118663.0000,118072.0000,118302.0000,118282.0000,118352.0000,118111.0000,118433.0000,117851.0000,118263.0000,118562.0000,117922.0000,118362.0000,118152.0000,117992.0000,118232.0000,118122.0000,118212.0000,118292.0000,118081.0000,117992.0000,118523.0000,118542.0000,118262.0000,118182.0000,118222.0000,118022.0000,118091.0000,118262.0000,124644.0000,118603.0000,118292.0000,118373.0000,117961.0000,118202.0000,118182.0000,118041.0000,118172.0000,118432.0000,118172.0000,118403.0000,118162.0000,118382.0000,118382.0000,118182.0000,118071.0000,118233.0000,117791.0000,118042.0000,117951.0000,118192.0000,118632.0000,118363.0000,118292.0000,118312.0000,118082.0000,118162.0000,118834.0000,118452.0000,118763.0000,118363.0000,118462.0000,118472.0000,124394.0000,118673.0000,118312.0000,118713.0000,118293.0000,117861.0000,118182.0000,118202.0000,118713.0000,118463.0000,118292.0000,118623.0000,118232.0000,118453.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,expanding tree topology,100,1,18783100,178499.4200,175710.5300,181240.9800,14018.4063,13649.2344,14499.2778,"191851.0000,192112.0000,200047.0000,192413.0000,191821.0000,190419.0000,191591.0000,191801.0000,192132.0000,192683.0000,192163.0000,192212.0000,198344.0000,192202.0000,191611.0000,191270.0000,192583.0000,191371.0000,192573.0000,191040.0000,192363.0000,191170.0000,191501.0000,193034.0000,192543.0000,191050.0000,191341.0000,192302.0000,191561.0000,190629.0000,191681.0000,190890.0000,187904.0000,175199.0000,165311.0000,163828.0000,164990.0000,164350.0000,164419.0000,164069.0000,163278.0000,165031.0000,163467.0000,163087.0000,163538.0000,179558.0000,164079.0000,163027.0000,163859.0000,164289.0000,164380.0000,162956.0000,162405.0000,163528.0000,163789.0000,163077.0000,163317.0000,181392.0000,163909.0000,162546.0000,162356.0000,162726.0000,162466.0000,163037.0000,162987.0000,163298.0000,163858.0000,164460.0000,163127.0000,163869.0000,162977.0000,163768.0000,163107.0000,163198.0000,163568.0000,164039.0000,163878.0000,163678.0000,162896.0000,163708.0000,163698.0000,163397.0000,194286.0000,192743.0000,192062.0000,190549.0000,191391.0000,191501.0000,191250.0000,191781.0000,191912.0000,191050.0000,191160.0000,191080.0000,191341.0000,190489.0000,191782.0000,191621.0000,191571.0000,191391.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,contracting tree topology,100,1,22407900,237058.4900,236748.5100,237540.5100,1943.9459,1399.1783,2779.0912,"236436.0000,237067.0000,247357.0000,236205.0000,237428.0000,235975.0000,236516.0000,236436.0000,244100.0000,238500.0000,236796.0000,238350.0000,236606.0000,236836.0000,237608.0000,237237.0000,236186.0000,237829.0000,236897.0000,236586.0000,236887.0000,237238.0000,236386.0000,235504.0000,237578.0000,242657.0000,236687.0000,238009.0000,235685.0000,236175.0000,236026.0000,236877.0000,237147.0000,238089.0000,236306.0000,238019.0000,235124.0000,237919.0000,237017.0000,236256.0000,237117.0000,237178.0000,241275.0000,236857.0000,236326.0000,235755.0000,236386.0000,236826.0000,236446.0000,235153.0000,235795.0000,234873.0000,236537.0000,236225.0000,235014.0000,236115.0000,236627.0000,235574.0000,235775.0000,241486.0000,235444.0000,236366.0000,236266.0000,235795.0000,236546.0000,235835.0000,235294.0000,237708.0000,235684.0000,235965.0000,236086.0000,236075.0000,236697.0000,235474.0000,237087.0000,235834.0000,243961.0000,237137.0000,236757.0000,235615.0000,236616.0000,236296.0000,236316.0000,236857.0000,237288.0000,236786.0000,236416.0000,235023.0000,235895.0000,237208.0000,237849.0000,236937.0000,236676.0000,242016.0000,236717.0000,236947.0000,236636.0000,238440.0000,237829.0000,237598.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,wave_sim topology,100,1,112368000,1153820.8400,1139187.0400,1164316.6700,62733.5303,48609.1963,75905.1836,"1178641.0000,1180044.0000,1209539.0000,1182568.0000,1175275.0000,1181868.0000,1177629.0000,1178972.0000,1179282.0000,1175115.0000,1174783.0000,1176156.0000,1172660.0000,1173110.0000,1174403.0000,1180584.0000,1178270.0000,1178561.0000,1018237.0000,981488.0000,998180.0000,985385.0000,1013368.0000,986497.0000,983562.0000,983080.0000,990144.0000,982820.0000,985465.0000,986056.0000,1009541.0000,1175004.0000,1168772.0000,1171838.0000,1184732.0000,1168541.0000,1172389.0000,1177308.0000,1166718.0000,1169084.0000,1171357.0000,1175235.0000,1173291.0000,1165767.0000,1171658.0000,1168261.0000,1169384.0000,1175084.0000,1169103.0000,1170135.0000,1177088.0000,1183901.0000,1180464.0000,1178632.0000,1188179.0000,1183851.0000,1182268.0000,1176787.0000,1188229.0000,1178611.0000,1180165.0000,1182929.0000,1177850.0000,1176898.0000,1186686.0000,1179483.0000,1178230.0000,1179343.0000,1181346.0000,1174213.0000,1174393.0000,1179362.0000,1175104.0000,1171467.0000,1178270.0000,1184252.0000,1176507.0000,1174804.0000,1183620.0000,1178331.0000,1175124.0000,1180214.0000,1177339.0000,1179122.0000,1173291.0000,1185905.0000,1176978.0000,1178491.0000,1185644.0000,1177068.0000,1177579.0000,1178641.0000,1182539.0000,1180645.0000,1182869.0000,1187248.0000,1180705.0000,1178331.0000,1184492.0000,1173652.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,jacobi topology,100,1,41319600,411122.7600,407683.8800,413830.0200,15479.9358,10284.1029,22009.5902,"351944.0000,360952.0000,423610.0000,418611.0000,414072.0000,415536.0000,408962.0000,409143.0000,411027.0000,411818.0000,412800.0000,412108.0000,410676.0000,415786.0000,412079.0000,409283.0000,409835.0000,412169.0000,412209.0000,410596.0000,413241.0000,411347.0000,409995.0000,418792.0000,412981.0000,413421.0000,413812.0000,412690.0000,411408.0000,411347.0000,410927.0000,411598.0000,418261.0000,411888.0000,413432.0000,411507.0000,413942.0000,413211.0000,415044.0000,414333.0000,413601.0000,413592.0000,419553.0000,413271.0000,412550.0000,413121.0000,413181.0000,413061.0000,412479.0000,412520.0000,411879.0000,412940.0000,485368.0000,415996.0000,415065.0000,413922.0000,414564.0000,413772.0000,415395.0000,414163.0000,413401.0000,419443.0000,416267.0000,415405.0000,413933.0000,414563.0000,414854.0000,414082.0000,414753.0000,413702.0000,414213.0000,420865.0000,414103.0000,413431.0000,415034.0000,412650.0000,412850.0000,414163.0000,413572.0000,413632.0000,413982.0000,419152.0000,413371.0000,413351.0000,413201.0000,413792.0000,413191.0000,412019.0000,412690.0000,414684.0000,417870.0000,414163.0000,413872.0000,413031.0000,413551.0000,410045.0000,353719.0000,350873.0000,352135.0000,382282.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,soup topology,100,1,173087600,1905307.8600,1882159.7800,1919161.3500,89596.9694,59207.4122,129731.2242,"1913864.0000,1914765.0000,1923813.0000,1943360.0000,1927880.0000,1689048.0000,1697704.0000,1459674.0000,1487245.0000,1504358.0000,1660694.0000,1886442.0000,1919475.0000,1916368.0000,1940053.0000,1857026.0000,1954802.0000,1935746.0000,1973487.0000,1913283.0000,1950643.0000,1945304.0000,1917000.0000,1982945.0000,1946766.0000,1919524.0000,1936807.0000,1920977.0000,1878767.0000,1924865.0000,1943900.0000,1914565.0000,1884699.0000,1951786.0000,1850884.0000,1923201.0000,1903925.0000,1927099.0000,1938029.0000,1959039.0000,1933030.0000,1888476.0000,1945865.0000,1920317.0000,1932800.0000,1920908.0000,1942388.0000,1920307.0000,1893125.0000,1939302.0000,1996370.0000,1918303.0000,2005337.0000,1946085.0000,1938280.0000,1956385.0000,1917611.0000,1888075.0000,1958358.0000,1944181.0000,1936006.0000,1972305.0000,1946516.0000,1940905.0000,1951936.0000,1907863.0000,1914355.0000,1893886.0000,1927330.0000,1917611.0000,1973597.0000,1905368.0000,1876373.0000,1920948.0000,1924815.0000,1907252.0000,1923011.0000,1884438.0000,1886352.0000,1885089.0000,1883918.0000,1954240.0000,1912882.0000,1946947.0000,1887785.0000,1919726.0000,1933562.0000,1893886.0000,1939903.0000,1978917.0000,1913624.0000,1920627.0000,1912502.0000,1930395.0000,1981271.0000,1932078.0000,1935295.0000,1936397.0000,1910919.0000,1864641.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,chain topology,100,1,30861000,359442.0800,352287.7900,366323.9300,35781.4542,32435.8078,39614.6721,"375128.0000,378244.0000,394976.0000,398783.0000,397281.0000,401208.0000,396910.0000,399124.0000,399425.0000,398894.0000,399545.0000,398212.0000,398062.0000,372203.0000,400297.0000,400256.0000,371151.0000,374086.0000,423610.0000,397992.0000,374508.0000,420996.0000,401379.0000,399375.0000,400247.0000,399275.0000,315486.0000,291911.0000,346164.0000,319684.0000,292332.0000,317991.0000,319073.0000,320335.0000,317610.0000,293504.0000,293554.0000,317921.0000,293304.0000,317991.0000,320014.0000,319373.0000,290699.0000,346995.0000,318131.0000,319703.0000,319162.0000,294496.0000,318442.0000,292863.0000,291440.0000,319563.0000,346364.0000,345734.0000,386290.0000,348008.0000,375870.0000,350151.0000,376482.0000,405486.0000,349410.0000,377633.0000,376531.0000,376912.0000,406418.0000,375810.0000,349040.0000,348238.0000,408442.0000,403623.0000,376521.0000,348599.0000,348378.0000,378004.0000,347637.0000,348468.0000,347747.0000,406718.0000,376531.0000,347357.0000,321647.0000,403693.0000,348208.0000,346344.0000,351554.0000,347507.0000,348729.0000,346886.0000,407260.0000,375700.0000,378065.0000,348138.0000,346144.0000,378485.0000,377053.0000,348589.0000,376011.0000,349911.0000,384115.0000,348859.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,expanding tree topology,100,1,42668300,404205.1500,394817.9800,413527.5100,47601.6294,43681.0542,51965.4280,"399445.0000,394686.0000,463556.0000,440282.0000,449660.0000,473295.0000,472112.0000,467173.0000,443498.0000,444369.0000,456202.0000,451504.0000,451043.0000,450231.0000,435543.0000,452155.0000,429051.0000,457455.0000,436936.0000,429803.0000,455421.0000,387933.0000,355863.0000,352516.0000,333340.0000,350603.0000,345953.0000,341325.0000,341666.0000,333460.0000,332608.0000,330845.0000,342437.0000,331716.0000,327890.0000,340833.0000,333881.0000,329211.0000,339581.0000,353808.0000,343269.0000,340233.0000,340243.0000,345122.0000,341525.0000,336195.0000,337557.0000,402361.0000,398062.0000,395998.0000,372765.0000,397742.0000,375479.0000,401829.0000,367093.0000,378335.0000,371011.0000,423631.0000,397942.0000,401339.0000,371813.0000,398663.0000,396168.0000,398282.0000,374658.0000,398383.0000,376351.0000,431485.0000,467544.0000,461933.0000,466401.0000,463997.0000,491409.0000,466501.0000,464027.0000,460611.0000,498111.0000,461763.0000,436295.0000,460170.0000,464187.0000,466091.0000,461242.0000,469287.0000,463406.0000,418651.0000,383856.0000,406949.0000,407099.0000,400327.0000,396249.0000,414634.0000,396579.0000,386760.0000,401960.0000,400127.0000,395196.0000,407290.0000,408402.0000,401038.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,contracting tree topology,100,1,41127400,345736.9400,337164.3600,354732.9100,45127.5311,40863.1305,49802.8792,"296099.0000,289998.0000,445853.0000,442857.0000,415134.0000,423611.0000,412369.0000,413722.0000,400898.0000,416928.0000,405666.0000,401319.0000,410776.0000,400526.0000,417689.0000,410566.0000,409634.0000,411498.0000,407310.0000,412109.0000,414784.0000,414293.0000,401779.0000,412409.0000,418761.0000,368205.0000,330805.0000,339040.0000,338800.0000,341395.0000,338539.0000,337858.0000,348118.0000,358257.0000,339502.0000,330203.0000,341415.0000,330394.0000,351133.0000,334341.0000,339602.0000,338108.0000,329392.0000,352536.0000,388604.0000,346224.0000,350452.0000,338750.0000,337407.0000,350232.0000,363056.0000,357044.0000,343669.0000,363787.0000,368867.0000,368457.0000,354259.0000,353207.0000,366042.0000,344661.0000,348037.0000,343078.0000,348338.0000,343448.0000,345893.0000,341465.0000,346104.0000,383295.0000,298323.0000,301108.0000,290028.0000,297532.0000,319473.0000,298494.0000,292252.0000,292482.0000,291931.0000,288895.0000,304345.0000,298393.0000,311248.0000,292112.0000,290168.0000,283245.0000,302441.0000,285308.0000,285869.0000,299205.0000,291160.0000,293263.0000,315146.0000,298123.0000,290298.0000,292633.0000,298754.0000,295748.0000,287172.0000,314403.0000,294816.0000,290719.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,wave_sim topology,100,1,120774500,1249644.7300,1234499.6900,1263825.4000,75007.3821,69782.4788,79530.5642,"1353151.0000,1296645.0000,1144767.0000,1145699.0000,1152462.0000,1153664.0000,1160637.0000,1128145.0000,1143765.0000,1144216.0000,1161068.0000,1254966.0000,1280013.0000,1309479.0000,1304339.0000,1296424.0000,1319819.0000,1277649.0000,1333245.0000,1307976.0000,1332904.0000,1305271.0000,1337051.0000,1298598.0000,1284902.0000,1291274.0000,1287768.0000,1339065.0000,1338604.0000,1290482.0000,1299851.0000,1291184.0000,1294139.0000,1293599.0000,1315110.0000,1333885.0000,1357380.0000,1295833.0000,1317995.0000,1293319.0000,1307105.0000,1282999.0000,1309669.0000,1278460.0000,1291245.0000,1295733.0000,1290974.0000,1332162.0000,1309850.0000,1313798.0000,1315330.0000,1302406.0000,1318857.0000,1290092.0000,1296624.0000,1284922.0000,1317945.0000,1224017.0000,1150718.0000,1159455.0000,1161749.0000,1160006.0000,1144306.0000,1137673.0000,1162510.0000,1150708.0000,1159375.0000,1136010.0000,1169384.0000,1164555.0000,1147773.0000,1164885.0000,1152401.0000,1154245.0000,1148584.0000,1144196.0000,1156950.0000,1138906.0000,1157541.0000,1164915.0000,1143815.0000,1178411.0000,1149115.0000,1150818.0000,1162962.0000,1283490.0000,1316513.0000,1289662.0000,1296004.0000,1337112.0000,1328755.0000,1309158.0000,1291625.0000,1294400.0000,1302465.0000,1294901.0000,1276697.0000,1345236.0000,1307475.0000,1290483.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,jacobi topology,100,1,55544400,554800.0700,546950.6300,565815.7200,46903.9225,34949.2255,77952.8180,"554208.0000,580978.0000,604995.0000,640331.0000,581229.0000,580739.0000,578454.0000,580408.0000,580307.0000,552305.0000,609573.0000,579005.0000,582452.0000,578574.0000,578995.0000,582963.0000,579857.0000,622147.0000,585608.0000,582281.0000,544559.0000,587040.0000,587501.0000,607950.0000,574667.0000,851281.0000,484396.0000,490007.0000,493323.0000,504194.0000,493684.0000,537867.0000,487723.0000,489055.0000,494465.0000,495236.0000,495988.0000,497821.0000,503051.0000,510816.0000,496629.0000,493403.0000,483925.0000,501959.0000,487561.0000,487030.0000,492431.0000,543688.0000,485097.0000,532848.0000,559729.0000,557084.0000,561732.0000,558085.0000,536655.0000,539590.0000,536234.0000,584255.0000,559528.0000,559087.0000,555972.0000,566051.0000,554208.0000,584465.0000,584165.0000,530483.0000,532598.0000,534320.0000,611977.0000,564878.0000,552043.0000,551803.0000,551202.0000,550191.0000,548347.0000,549268.0000,578554.0000,560841.0000,578635.0000,581971.0000,549048.0000,553917.0000,550460.0000,609032.0000,580758.0000,551072.0000,552014.0000,552404.0000,582231.0000,547516.0000,554799.0000,546674.0000,554468.0000,580497.0000,551984.0000,550010.0000,552534.0000,609753.0000,576491.0000,549790.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,soup topology,100,1,303391900,2994286.4400,2956194.4300,3027040.1200,180038.8582,156877.8045,198661.7726,"2686057.0000,2670518.0000,3116562.0000,3096144.0000,3092998.0000,3098469.0000,3092877.0000,3094661.0000,3093438.0000,3122164.0000,2678032.0000,2663785.0000,2669365.0000,2795293.0000,3087026.0000,3093249.0000,3089842.0000,3102456.0000,3145387.0000,2664917.0000,2667502.0000,2715202.0000,2664155.0000,2677361.0000,2741964.0000,3096735.0000,3101965.0000,3101815.0000,3127232.0000,3104790.0000,3093690.0000,3104379.0000,3100592.0000,3100542.0000,3093760.0000,3101203.0000,2896315.0000,2673172.0000,2663004.0000,2946421.0000,3101283.0000,3093930.0000,3098298.0000,3108067.0000,3106303.0000,3057391.0000,2663164.0000,2664566.0000,2671039.0000,2662061.0000,2973702.0000,3100452.0000,3102316.0000,3096575.0000,3094771.0000,3098268.0000,3099030.0000,3100021.0000,3102005.0000,3106533.0000,3099110.0000,3102576.0000,3091926.0000,3100122.0000,3097276.0000,3103138.0000,3094801.0000,3099571.0000,3142772.0000,3102706.0000,3101044.0000,3092367.0000,3095293.0000,3100361.0000,3099640.0000,3098388.0000,3101294.0000,3098869.0000,3165646.0000,3190403.0000,2949586.0000,2664176.0000,2731573.0000,3094812.0000,3101273.0000,3097688.0000,3102095.0000,3093649.0000,3091645.0000,3096034.0000,3095723.0000,3105411.0000,3092207.0000,3102335.0000,3100482.0000,3042392.0000,2663764.0000,2668613.0000,2669645.0000,2663424.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,chain topology,100,1,42831000,419183.2100,416939.8400,422921.2400,14442.1842,10319.4302,25587.6631,"411738.0000,410827.0000,440683.0000,433710.0000,433179.0000,439531.0000,433519.0000,432938.0000,433479.0000,433629.0000,432798.0000,433279.0000,433619.0000,433480.0000,433860.0000,432417.0000,432388.0000,432808.0000,433800.0000,433610.0000,432047.0000,432297.0000,431606.0000,434902.0000,433990.0000,433900.0000,432888.0000,432888.0000,432337.0000,432538.0000,433239.0000,433930.0000,517268.0000,434752.0000,421917.0000,410545.0000,410035.0000,410656.0000,410084.0000,410155.0000,409954.0000,410536.0000,414754.0000,410777.0000,410115.0000,410686.0000,410245.0000,410566.0000,410716.0000,410385.0000,410526.0000,410285.0000,414714.0000,410536.0000,410746.0000,410967.0000,410756.0000,410807.0000,410636.0000,410957.0000,410896.0000,411508.0000,411047.0000,411007.0000,410956.0000,409774.0000,409905.0000,410786.0000,410436.0000,410817.0000,410696.0000,415816.0000,410496.0000,411046.0000,410485.0000,410576.0000,410636.0000,410927.0000,410656.0000,410406.0000,410766.0000,414844.0000,411147.0000,410666.0000,410767.0000,410526.0000,410856.0000,410396.0000,410606.0000,410726.0000,414073.0000,412269.0000,410335.0000,410555.0000,410516.0000,411017.0000,410416.0000,410696.0000,410416.0000,410926.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,expanding tree topology,100,1,48814600,475606.0000,473136.3600,478372.4000,13333.5796,12074.2126,14346.7723,"493894.0000,493483.0000,505716.0000,496799.0000,495086.0000,496599.0000,494465.0000,494314.0000,495176.0000,494625.0000,494575.0000,493834.0000,494826.0000,499464.0000,493854.0000,493473.0000,494174.0000,493543.0000,494675.0000,494505.0000,494204.0000,478625.0000,466973.0000,466793.0000,465991.0000,466081.0000,465460.0000,466562.0000,465751.0000,472042.0000,467533.0000,466221.0000,466472.0000,466562.0000,465901.0000,465460.0000,465259.0000,464919.0000,472103.0000,466522.0000,465960.0000,465920.0000,465190.0000,465530.0000,465600.0000,465721.0000,468415.0000,466913.0000,466291.0000,465910.0000,465821.0000,465450.0000,466261.0000,465721.0000,466091.0000,468325.0000,466673.0000,466081.0000,465941.0000,466141.0000,465470.0000,465570.0000,465781.0000,465349.0000,473275.0000,465029.0000,466211.0000,466252.0000,465650.0000,466332.0000,466502.0000,465790.0000,473095.0000,466983.0000,465871.0000,466201.0000,465690.0000,465600.0000,465750.0000,466472.0000,465981.0000,466983.0000,466401.0000,466712.0000,465800.0000,466051.0000,466000.0000,466562.0000,466161.0000,475789.0000,495728.0000,494725.0000,495126.0000,493934.0000,494445.0000,494225.0000,494635.0000,499144.0000,493563.0000,493293.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,contracting tree topology,100,1,52433300,524375.5700,520466.3500,528085.9900,19386.3601,18389.6351,20190.9230,"506628.0000,499674.0000,555751.0000,541013.0000,540883.0000,539250.0000,539771.0000,540522.0000,540733.0000,540071.0000,539891.0000,538789.0000,539650.0000,540431.0000,540061.0000,538358.0000,540552.0000,538529.0000,539590.0000,539610.0000,538668.0000,538779.0000,537587.0000,542295.0000,539531.0000,539420.0000,540171.0000,540552.0000,538768.0000,539571.0000,539931.0000,539069.0000,538920.0000,539059.0000,540032.0000,539219.0000,539270.0000,538729.0000,541605.0000,540902.0000,540972.0000,539219.0000,540973.0000,539290.0000,539260.0000,545471.0000,539099.0000,540562.0000,540132.0000,540552.0000,540031.0000,540813.0000,539110.0000,549469.0000,541674.0000,538649.0000,540822.0000,539640.0000,539480.0000,539440.0000,540913.0000,517569.0000,501478.0000,500887.0000,499755.0000,499966.0000,501438.0000,499464.0000,505726.0000,502259.0000,499805.0000,500696.0000,501188.0000,499695.0000,500416.0000,500366.0000,504985.0000,499374.0000,499283.0000,500025.0000,501498.0000,499174.0000,500637.0000,499163.0000,506478.0000,500516.0000,500386.0000,500536.0000,499735.0000,500246.0000,497991.0000,500447.0000,503562.0000,500436.0000,501368.0000,499805.0000,500647.0000,500786.0000,500776.0000,501589.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,wave_sim topology,100,1,322037300,3187415.3100,3171931.4200,3199785.3800,70663.7104,58070.2093,82078.8070,"3221902.0000,3221411.0000,3053754.0000,3038916.0000,3035529.0000,3025400.0000,3024237.0000,3141369.0000,3222584.0000,3227252.0000,3221172.0000,3213115.0000,3215751.0000,3212885.0000,3211833.0000,3219397.0000,3213246.0000,3212825.0000,3224237.0000,3225018.0000,3222553.0000,3216011.0000,3220490.0000,3225188.0000,3223566.0000,3221952.0000,3218616.0000,3222072.0000,3222002.0000,3219148.0000,3222733.0000,3224917.0000,3225509.0000,3223064.0000,3224127.0000,3222724.0000,3223134.0000,3217423.0000,3219728.0000,3221492.0000,3216071.0000,3301353.0000,3223646.0000,3223044.0000,3181807.0000,3028686.0000,3027383.0000,3027113.0000,3154364.0000,3222223.0000,3213335.0000,3214127.0000,3211933.0000,3214769.0000,3208998.0000,3038215.0000,3029668.0000,3027163.0000,3029728.0000,3021503.0000,3017835.0000,3031232.0000,3134877.0000,3229797.0000,3221230.0000,3217032.0000,3223395.0000,3222493.0000,3221662.0000,3215890.0000,3217825.0000,3214538.0000,3220910.0000,3222803.0000,3164324.0000,3035589.0000,3191033.0000,3223906.0000,3221101.0000,3214899.0000,3209799.0000,3214790.0000,3225499.0000,3222543.0000,3228204.0000,3223825.0000,3226010.0000,3228776.0000,3221592.0000,3223666.0000,3220990.0000,3220971.0000,3222803.0000,3224256.0000,3221111.0000,3221321.0000,3219748.0000,3218726.0000,3227453.0000,3223666.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,jacobi topology,100,1,91775400,918443.7000,917737.1900,920552.2100,5688.7143,2383.0616,12438.1429,"918549.0000,923427.0000,925311.0000,918157.0000,915633.0000,916915.0000,914631.0000,916395.0000,915142.0000,923167.0000,916404.0000,917496.0000,918367.0000,917957.0000,925792.0000,918008.0000,919680.0000,916756.0000,921313.0000,916164.0000,916114.0000,915803.0000,922075.0000,915934.0000,914390.0000,916415.0000,915001.0000,921534.0000,917166.0000,915162.0000,916194.0000,920642.0000,916164.0000,916705.0000,916024.0000,916324.0000,919230.0000,919700.0000,918518.0000,918589.0000,969314.0000,920362.0000,917717.0000,918448.0000,918438.0000,916966.0000,915282.0000,916134.0000,916194.0000,917266.0000,916484.0000,916765.0000,917306.0000,919350.0000,916454.0000,916073.0000,915713.0000,919681.0000,916214.0000,918388.0000,918429.0000,918528.0000,918889.0000,918709.0000,917056.0000,915121.0000,921754.0000,917697.0000,915412.0000,916314.0000,921574.0000,916975.0000,915232.0000,916805.0000,917577.0000,923638.0000,917726.0000,917135.0000,918739.0000,924079.0000,916264.0000,916324.0000,915463.0000,921303.0000,914742.0000,915302.0000,915603.0000,916405.0000,922395.0000,916304.0000,915562.0000,914431.0000,920852.0000,917566.0000,917526.0000,918878.0000,919319.0000,922596.0000,918949.0000,919700.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,soup topology,100,1,290533000,2881294.9400,2837976.6600,2922191.8900,214528.5858,193160.2740,235487.8547,"2559868.0000,2583202.0000,3239175.0000,3222473.0000,3204549.0000,3187297.0000,3170555.0000,3163492.0000,3139476.0000,3154926.0000,3232111.0000,3226871.0000,3179683.0000,3099741.0000,3022885.0000,2956639.0000,2992036.0000,2961258.0000,3019920.0000,2952101.0000,2977038.0000,2951941.0000,2966989.0000,2963443.0000,2954646.0000,2967180.0000,2978511.0000,2972670.0000,3032122.0000,3119608.0000,3133664.0000,3162179.0000,2919740.0000,2938486.0000,3086616.0000,3046960.0000,2950387.0000,2973612.0000,2944166.0000,2943525.0000,2924640.0000,2936742.0000,2928185.0000,2938234.0000,2929979.0000,3094130.0000,2979623.0000,2767772.0000,2566851.0000,2580537.0000,2563164.0000,2543607.0000,2535601.0000,2544158.0000,2539790.0000,2558816.0000,2562483.0000,2862351.0000,2958863.0000,2806415.0000,2581418.0000,3050628.0000,2570518.0000,2531675.0000,2552995.0000,2547925.0000,2589113.0000,2578193.0000,2547685.0000,2888350.0000,2932964.0000,2928777.0000,2987578.0000,2931813.0000,2993359.0000,2931713.0000,2933606.0000,2959345.0000,3017756.0000,2961619.0000,2926833.0000,2931863.0000,2938305.0000,3039346.0000,2930220.0000,2913698.0000,2958814.0000,2904200.0000,2935820.0000,2928796.0000,3003779.0000,2811645.0000,2584945.0000,2571340.0000,2571740.0000,2579185.0000,2547344.0000,2572823.0000,2545040.0000,2543046.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,chain topology,100,1,52749800,544596.8300,541820.4800,547136.0700,13549.6208,11943.0122,15577.5160,"549769.0000,556191.0000,539230.0000,575609.0000,549670.0000,526886.0000,523420.0000,552244.0000,547756.0000,552254.0000,549108.0000,552054.0000,548117.0000,524803.0000,552425.0000,522468.0000,520925.0000,551643.0000,523360.0000,522829.0000,522488.0000,520684.0000,553908.0000,548808.0000,552555.0000,551382.0000,549980.0000,551263.0000,553276.0000,552805.0000,523090.0000,521165.0000,547685.0000,556262.0000,546754.0000,555711.0000,521676.0000,549829.0000,523860.0000,547024.0000,554289.0000,554599.0000,549028.0000,520574.0000,524071.0000,550962.0000,549980.0000,553768.0000,552124.0000,549369.0000,553556.0000,518961.0000,550591.0000,556152.0000,551013.0000,549950.0000,552966.0000,547555.0000,553778.0000,522638.0000,552965.0000,549789.0000,522818.0000,547635.0000,552264.0000,552845.0000,551233.0000,521967.0000,551233.0000,549800.0000,554769.0000,553737.0000,550371.0000,552966.0000,520393.0000,549309.0000,548938.0000,556483.0000,522498.0000,547234.0000,554408.0000,552424.0000,547926.0000,525664.0000,551413.0000,550711.0000,551714.0000,550611.0000,583243.0000,548648.0000,523931.0000,551664.0000,522699.0000,547425.0000,551423.0000,555591.0000,552405.0000,550231.0000,550130.0000,521285.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,expanding tree topology,100,1,61222800,620310.3800,612799.5500,627907.0000,38647.8899,32461.2595,50987.0695,"667743.0000,640912.0000,658956.0000,637366.0000,636665.0000,638258.0000,640993.0000,635392.0000,638688.0000,611426.0000,638688.0000,613441.0000,615554.0000,627577.0000,616867.0000,613080.0000,638137.0000,661491.0000,639820.0000,604403.0000,586099.0000,607770.0000,640151.0000,633739.0000,614361.0000,607850.0000,644249.0000,633809.0000,638267.0000,621906.0000,637055.0000,605104.0000,640111.0000,612709.0000,641584.0000,609523.0000,634521.0000,640351.0000,609332.0000,639861.0000,637967.0000,637215.0000,655560.0000,642085.0000,635102.0000,609994.0000,608842.0000,640652.0000,637746.0000,608871.0000,639008.0000,635652.0000,639630.0000,637956.0000,609833.0000,608901.0000,609532.0000,638177.0000,612940.0000,633849.0000,644810.0000,635763.0000,640482.0000,636905.0000,637747.0000,611306.0000,666310.0000,639931.0000,646042.0000,634239.0000,640561.0000,585517.0000,582642.0000,695596.0000,674145.0000,788392.0000,532076.0000,515415.0000,541935.0000,522838.0000,536354.0000,555971.0000,571471.0000,575628.0000,564077.0000,580518.0000,566972.0000,575469.0000,560460.0000,577001.0000,570509.0000,569838.0000,578103.0000,577352.0000,569968.0000,670057.0000,667021.0000,633017.0000,643056.0000,662223.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,contracting tree topology,100,1,62981800,652830.1900,646633.9500,658850.3800,31311.7238,26483.7210,37619.8583,"633648.0000,649158.0000,582852.0000,586940.0000,562033.0000,570569.0000,566812.0000,582933.0000,648106.0000,660349.0000,692761.0000,638979.0000,741714.0000,667282.0000,635281.0000,667933.0000,637786.0000,690055.0000,641313.0000,664607.0000,633078.0000,666731.0000,670368.0000,669988.0000,638097.0000,695466.0000,670679.0000,666290.0000,667002.0000,635332.0000,666911.0000,638598.0000,638427.0000,641674.0000,667593.0000,723870.0000,639570.0000,636854.0000,637416.0000,668855.0000,667102.0000,665519.0000,669647.0000,669176.0000,684234.0000,634701.0000,643437.0000,695636.0000,608701.0000,670248.0000,697029.0000,668294.0000,635792.0000,642675.0000,663986.0000,677271.0000,645431.0000,700485.0000,664958.0000,639850.0000,689425.0000,666741.0000,645430.0000,637396.0000,636153.0000,698471.0000,637846.0000,723429.0000,669947.0000,638768.0000,665369.0000,640061.0000,695045.0000,637506.0000,640852.0000,636804.0000,669427.0000,697480.0000,640572.0000,639470.0000,641143.0000,630813.0000,635983.0000,640461.0000,668935.0000,636143.0000,640371.0000,636364.0000,698702.0000,699584.0000,667032.0000,635763.0000,638057.0000,636053.0000,610765.0000,665128.0000,642225.0000,634590.0000,643999.0000,636634.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,wave_sim topology,100,1,325975000,3257041.4400,3231406.5800,3280813.7800,125546.1884,111296.3168,142668.6904,"3067931.0000,3021392.0000,3576985.0000,3511581.0000,3522171.0000,3473839.0000,3399259.0000,3357389.0000,3371675.0000,3361647.0000,3370714.0000,3276736.0000,3341329.0000,3338223.0000,3338183.0000,3229286.0000,3045578.0000,3062981.0000,3209709.0000,3288538.0000,3314016.0000,3282226.0000,3303466.0000,3285963.0000,3307164.0000,3286294.0000,3305941.0000,3310159.0000,3113958.0000,3062149.0000,3065165.0000,3041922.0000,3054244.0000,3057741.0000,3110160.0000,3054555.0000,3064283.0000,3020652.0000,3213046.0000,3285923.0000,3311562.0000,3274382.0000,3287296.0000,3309078.0000,3365374.0000,3318405.0000,3324828.0000,3369401.0000,3312764.0000,3281355.0000,3317473.0000,3329777.0000,3280794.0000,3088740.0000,3082378.0000,3166738.0000,3289250.0000,3352229.0000,3297866.0000,3296333.0000,3301372.0000,3299559.0000,3307494.0000,3263361.0000,3306442.0000,3350606.0000,3389249.0000,3274702.0000,3308666.0000,3290082.0000,3329827.0000,3334816.0000,3293338.0000,3337301.0000,3370654.0000,3303046.0000,3288960.0000,3310630.0000,3277087.0000,3193719.0000,3070214.0000,3051850.0000,3037713.0000,3029849.0000,3033716.0000,3411151.0000,3192035.0000,3048414.0000,3105842.0000,3053133.0000,3239135.0000,3382606.0000,3355725.0000,3320529.0000,3311352.0000,3334636.0000,3278559.0000,3283108.0000,3337762.0000,3335708.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,jacobi topology,100,1,94880600,986714.7800,974279.9900,999104.1200,63472.5165,55941.1135,71940.0299,"1019399.0000,1040789.0000,1029940.0000,1015983.0000,1018648.0000,1126763.0000,1010793.0000,1046331.0000,1021704.0000,1014751.0000,990154.0000,1040018.0000,981859.0000,1019740.0000,980897.0000,988852.0000,955268.0000,989453.0000,991857.0000,988641.0000,986277.0000,1015903.0000,983261.0000,982860.0000,985916.0000,988301.0000,992118.0000,987770.0000,986387.0000,987740.0000,982910.0000,993510.0000,984072.0000,1041942.0000,992588.0000,979123.0000,987319.0000,985455.0000,992218.0000,987900.0000,989653.0000,983622.0000,970367.0000,1011565.0000,1033697.0000,1010102.0000,933597.0000,870187.0000,899152.0000,906796.0000,871981.0000,874745.0000,862101.0000,899963.0000,874314.0000,895184.0000,862311.0000,879494.0000,908309.0000,891928.0000,897458.0000,885025.0000,889573.0000,884483.0000,864726.0000,958053.0000,949577.0000,958784.0000,974665.0000,966820.0000,956991.0000,960838.0000,975686.0000,974093.0000,973603.0000,961149.0000,963634.0000,948566.0000,955789.0000,984874.0000,962011.0000,1055538.0000,1066549.0000,1090404.0000,1093269.0000,1113267.0000,1067921.0000,1088991.0000,1091476.0000,1092978.0000,1092017.0000,1060107.0000,1096595.0000,1090864.0000,1089512.0000,1090955.0000,1001375.0000,1016414.0000,1016915.0000,985455.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,soup topology,100,1,231651100,2408020.5200,2364827.9900,2444340.5000,202784.5336,175728.0803,223954.1457,"2528188.0000,2524361.0000,2529971.0000,2522267.0000,2512007.0000,2710994.0000,2622075.0000,2333679.0000,2048509.0000,2040804.0000,2045854.0000,2039862.0000,2045093.0000,2044411.0000,2047737.0000,2043620.0000,2046746.0000,2041366.0000,2040725.0000,2039332.0000,2048479.0000,2040494.0000,2492811.0000,2512608.0000,2514512.0000,2520193.0000,2518640.0000,2508211.0000,2516846.0000,2510425.0000,2141645.0000,2043420.0000,2034643.0000,2048279.0000,2040323.0000,2094767.0000,2513881.0000,2524601.0000,2514823.0000,2520894.0000,2520453.0000,2512548.0000,2520853.0000,2516145.0000,2508010.0000,2516686.0000,2512588.0000,2065752.0000,2047517.0000,2040644.0000,2077794.0000,2511777.0000,2525774.0000,2511406.0000,2522377.0000,2522527.0000,2512879.0000,2522647.0000,2525313.0000,2512338.0000,2630411.0000,2518339.0000,2513259.0000,2520143.0000,2515434.0000,2522727.0000,2519331.0000,2513980.0000,2521044.0000,2520143.0000,2510405.0000,2518950.0000,2518289.0000,2519110.0000,2519281.0000,2513339.0000,2524952.0000,2523469.0000,2510425.0000,2528449.0000,2512318.0000,2514983.0000,2524831.0000,2516576.0000,2522928.0000,2523249.0000,2507139.0000,2515484.0000,2519030.0000,2516797.0000,2520714.0000,2507769.0000,2521916.0000,2520864.0000,2513621.0000,2519963.0000,2516505.0000,2518900.0000,2519311.0000,2513550.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,chain topology,100,1,35998800,382178.0700,381763.7300,382798.8500,2549.9286,1898.4866,3711.5280,"382062.0000,381862.0000,391470.0000,380729.0000,385298.0000,380959.0000,379757.0000,378786.0000,380449.0000,379497.0000,379847.0000,379898.0000,379948.0000,380218.0000,387342.0000,380760.0000,379477.0000,379126.0000,378526.0000,379647.0000,382343.0000,381781.0000,380710.0000,381691.0000,380409.0000,386761.0000,381571.0000,381340.0000,380609.0000,382493.0000,381220.0000,382443.0000,381581.0000,382753.0000,380810.0000,386621.0000,381621.0000,381120.0000,381291.0000,382162.0000,381651.0000,382112.0000,380770.0000,381290.0000,382172.0000,382052.0000,389436.0000,380429.0000,381621.0000,380840.0000,382412.0000,381421.0000,381591.0000,381251.0000,381942.0000,381370.0000,386010.0000,382483.0000,382202.0000,381651.0000,382052.0000,380449.0000,380449.0000,381050.0000,381661.0000,381401.0000,382553.0000,386821.0000,382774.0000,382062.0000,382583.0000,382022.0000,380990.0000,381551.0000,381000.0000,382112.0000,381841.0000,395938.0000,383635.0000,382222.0000,382613.0000,382904.0000,382422.0000,381561.0000,382122.0000,381881.0000,381631.0000,382362.0000,388675.0000,382603.0000,381170.0000,381782.0000,382032.0000,381090.0000,381361.0000,382803.0000,380770.0000,382352.0000,387533.0000,381280.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,expanding tree topology,100,1,45834300,455874.0900,450144.9700,460637.9200,26691.6963,23135.4057,29508.0046,"469668.0000,471893.0000,423360.0000,410796.0000,407941.0000,406659.0000,414794.0000,406308.0000,408542.0000,408802.0000,408211.0000,409193.0000,409784.0000,406448.0000,408733.0000,407059.0000,415505.0000,406929.0000,408001.0000,407981.0000,406037.0000,405907.0000,407169.0000,407971.0000,405877.0000,406478.0000,449239.0000,470089.0000,468696.0000,469998.0000,470178.0000,470209.0000,467273.0000,470109.0000,474767.0000,469928.0000,470580.0000,470419.0000,469678.0000,469116.0000,469648.0000,470519.0000,470300.0000,493042.0000,470800.0000,471652.0000,468726.0000,471101.0000,470890.0000,470520.0000,470750.0000,475920.0000,469177.0000,472243.0000,471792.0000,472242.0000,469758.0000,472253.0000,469818.0000,468516.0000,475469.0000,468896.0000,470490.0000,471010.0000,468916.0000,469357.0000,469978.0000,471331.0000,474648.0000,471090.0000,468737.0000,470720.0000,469938.0000,470229.0000,471201.0000,472022.0000,470840.0000,480418.0000,467183.0000,468145.0000,467664.0000,469788.0000,468486.0000,470179.0000,469247.0000,477513.0000,469818.0000,471371.0000,471030.0000,469578.0000,468676.0000,470289.0000,471120.0000,470881.0000,474757.0000,469617.0000,471882.0000,468976.0000,468556.0000,471371.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,contracting tree topology,100,1,49472400,495856.3300,488891.6900,501854.2800,32954.8071,28905.6761,35960.6429,"441545.0000,445031.0000,521486.0000,515434.0000,520414.0000,515625.0000,513812.0000,512770.0000,513300.0000,514232.0000,513010.0000,513210.0000,519462.0000,514252.0000,512660.0000,514042.0000,514262.0000,512169.0000,512339.0000,521075.0000,514723.0000,512610.0000,515645.0000,516296.0000,515645.0000,515555.0000,514012.0000,522428.0000,514193.0000,513090.0000,514874.0000,514082.0000,514794.0000,512539.0000,515846.0000,520554.0000,515144.0000,513762.0000,515224.0000,515295.0000,514733.0000,513311.0000,515715.0000,518360.0000,513962.0000,513341.0000,514713.0000,513200.0000,514312.0000,513961.0000,520143.0000,516387.0000,515254.0000,513150.0000,513501.0000,515294.0000,514302.0000,514753.0000,518931.0000,515966.0000,514994.0000,516156.0000,513902.0000,515404.0000,514022.0000,514092.0000,520404.0000,514092.0000,512910.0000,512449.0000,515365.0000,513271.0000,515164.0000,514343.0000,520254.0000,524101.0000,439320.0000,438990.0000,437798.0000,437417.0000,439721.0000,438980.0000,450111.0000,438859.0000,438088.0000,438959.0000,438429.0000,440653.0000,441134.0000,440002.0000,440342.0000,440493.0000,447396.0000,441575.0000,441484.0000,438800.0000,439691.0000,439871.0000,439220.0000,437647.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,wave_sim topology,100,1,230685200,2351543.2400,2321928.0900,2374336.5800,132285.5051,106562.7804,156249.4901,"2400095.0000,2406077.0000,2640070.0000,2417849.0000,2408280.0000,2398562.0000,2392461.0000,2401528.0000,2389685.0000,2399264.0000,2411356.0000,2398583.0000,2414763.0000,2397390.0000,2402701.0000,2407289.0000,2395306.0000,2403291.0000,2397610.0000,2402530.0000,2402420.0000,2400886.0000,2399735.0000,2396259.0000,2405325.0000,2404554.0000,2398742.0000,2408461.0000,2397460.0000,2399374.0000,2401267.0000,2397270.0000,2395206.0000,2029714.0000,2046636.0000,2042558.0000,2051034.0000,2038240.0000,2039391.0000,2034893.0000,2036296.0000,2038400.0000,2047768.0000,2042658.0000,2055752.0000,2040804.0000,2049691.0000,2037398.0000,2383293.0000,2415033.0000,2410925.0000,2413581.0000,2403803.0000,2404674.0000,2408752.0000,2399073.0000,2410255.0000,2401848.0000,2412619.0000,2412219.0000,2408782.0000,2402750.0000,2401909.0000,2408181.0000,2408862.0000,2400235.0000,2412829.0000,2405585.0000,2414453.0000,2408642.0000,2401418.0000,2408341.0000,2401467.0000,2415955.0000,2409583.0000,2396849.0000,2400486.0000,2390998.0000,2406488.0000,2404424.0000,2399003.0000,2404794.0000,2399985.0000,2407990.0000,2409463.0000,2399364.0000,2402951.0000,2419672.0000,2405024.0000,2392260.0000,2393342.0000,2399815.0000,2398572.0000,2390607.0000,2403021.0000,2408292.0000,2409974.0000,2405255.0000,2397741.0000,2400005.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,jacobi topology,100,1,87948800,879728.7800,866618.9300,891707.7400,63835.7738,58907.0289,66579.1543,"923909.0000,925662.0000,842835.0000,795626.0000,794333.0000,793131.0000,790455.0000,800765.0000,792890.0000,795065.0000,790936.0000,791829.0000,799352.0000,792029.0000,790366.0000,792460.0000,792850.0000,801517.0000,787140.0000,792149.0000,788682.0000,786579.0000,798911.0000,796517.0000,793762.0000,794523.0000,789103.0000,791136.0000,800484.0000,793031.0000,791277.0000,792069.0000,793872.0000,801397.0000,794092.0000,791287.0000,794032.0000,793100.0000,923488.0000,930081.0000,923297.0000,928076.0000,935801.0000,929720.0000,926834.0000,926013.0000,937384.0000,930561.0000,929479.0000,924831.0000,929799.0000,925020.0000,924710.0000,926012.0000,925682.0000,933787.0000,928969.0000,925140.0000,929188.0000,931493.0000,927125.0000,927365.0000,924730.0000,929880.0000,928908.0000,930311.0000,926353.0000,924229.0000,931834.0000,928718.0000,925451.0000,926814.0000,930130.0000,928567.0000,927435.0000,925070.0000,929229.0000,925882.0000,927024.0000,923378.0000,925090.0000,931392.0000,924760.0000,928678.0000,924640.0000,928527.0000,922505.0000,922105.0000,923838.0000,930732.0000,923678.0000,924379.0000,922797.0000,920803.0000,931102.0000,926263.0000,927656.0000,930541.0000,934669.0000,925772.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,soup topology,100,1,191089500,1799572.2300,1757462.7600,1845403.6000,224525.5135,189036.8945,270138.7390,"1860062.0000,1888126.0000,2374728.0000,2410144.0000,2381279.0000,2382433.0000,1853149.0000,1863228.0000,1848580.0000,1854992.0000,1837549.0000,1879037.0000,1861434.0000,1886141.0000,1860142.0000,1869180.0000,1949101.0000,1896912.0000,1515960.0000,1861755.0000,2503430.0000,2367934.0000,1595982.0000,1591032.0000,1586674.0000,1577697.0000,1753419.0000,1607303.0000,1515740.0000,1504398.0000,1451709.0000,1667918.0000,1928692.0000,2265009.0000,1934152.0000,1874459.0000,1872215.0000,1836577.0000,1493077.0000,1503046.0000,1510279.0000,1498728.0000,1526942.0000,1497034.0000,1485944.0000,1514347.0000,1509838.0000,1491364.0000,1491013.0000,1474792.0000,1464463.0000,1471446.0000,1433714.0000,1506452.0000,1459403.0000,1492506.0000,1767176.0000,1888376.0000,1860152.0000,1886913.0000,1858540.0000,1851756.0000,1869239.0000,1847678.0000,1887003.0000,1860864.0000,1860834.0000,1852718.0000,1858940.0000,1862076.0000,1856034.0000,1798135.0000,1858589.0000,1833281.0000,1857607.0000,1888626.0000,1881664.0000,1840205.0000,1848560.0000,1860733.0000,1848620.0000,1860002.0000,1866734.0000,1807452.0000,1861856.0000,1831648.0000,1863879.0000,1848510.0000,1863148.0000,1852569.0000,1894538.0000,1846126.0000,1836858.0000,1831958.0000,1854712.0000,1830576.0000,1882785.0000,1860262.0000,1828222.0000,1858409.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,chain topology,100,1,53654700,601158.8100,583612.9600,626567.1500,106606.9254,80174.1365,132301.5416,"579476.0000,581039.0000,940220.0000,898250.0000,900585.0000,870427.0000,929238.0000,900003.0000,871770.0000,897800.0000,900845.0000,869897.0000,931042.0000,549499.0000,579866.0000,552865.0000,579285.0000,551763.0000,553086.0000,549328.0000,579576.0000,583955.0000,578504.0000,551623.0000,580588.0000,583593.0000,547064.0000,551001.0000,580908.0000,580498.0000,549329.0000,582422.0000,555661.0000,577092.0000,579666.0000,550411.0000,610655.0000,554499.0000,547796.0000,550471.0000,553918.0000,577362.0000,581530.0000,579716.0000,553496.0000,550431.0000,581039.0000,553287.0000,578514.0000,553957.0000,578704.0000,579356.0000,582501.0000,549739.0000,581680.0000,550121.0000,552274.0000,580347.0000,580467.0000,581369.0000,550411.0000,551403.0000,580398.0000,551192.0000,579105.0000,580819.0000,581881.0000,550261.0000,551573.0000,551724.0000,581139.0000,577301.0000,552134.0000,553016.0000,552275.0000,549079.0000,550481.0000,555321.0000,551022.0000,551923.0000,550912.0000,581440.0000,576911.0000,553907.0000,549579.0000,553427.0000,550752.0000,548978.0000,553987.0000,550561.0000,551082.0000,554258.0000,550351.0000,552024.0000,577202.0000,552485.0000,554388.0000,576020.0000,582572.0000,551183.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,expanding tree topology,100,1,67589500,707736.0100,703223.5800,712440.9900,23495.6254,20142.9228,28388.1335,"694013.0000,686399.0000,699633.0000,638408.0000,652404.0000,656171.0000,666531.0000,651302.0000,766741.0000,686298.0000,697439.0000,697860.0000,694925.0000,698101.0000,759878.0000,689034.0000,728358.0000,724671.0000,729560.0000,733488.0000,719150.0000,729941.0000,697179.0000,728378.0000,685688.0000,782450.0000,729831.0000,698652.0000,694514.0000,690757.0000,700876.0000,685457.0000,725432.0000,690216.0000,689555.0000,691248.0000,759557.0000,696888.0000,701697.0000,691278.0000,696417.0000,715634.0000,698681.0000,693221.0000,697159.0000,694834.0000,723899.0000,728969.0000,728138.0000,717688.0000,732636.0000,719280.0000,685587.0000,737055.0000,748506.0000,695666.0000,722126.0000,709081.0000,696518.0000,692861.0000,729169.0000,726144.0000,732275.0000,692090.0000,705565.0000,717287.0000,722407.0000,699824.0000,697660.0000,691188.0000,731153.0000,692570.0000,695486.0000,688503.0000,726895.0000,725663.0000,694644.0000,727727.0000,727646.0000,704893.0000,718729.0000,736082.0000,720473.0000,693843.0000,729881.0000,691919.0000,730301.0000,722396.0000,699673.0000,694344.0000,727486.0000,692951.0000,699333.0000,725453.0000,700826.0000,691518.0000,699293.0000,695416.0000,710264.0000,690717.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,contracting tree topology,100,1,70057000,753270.5800,736043.7300,779084.5900,105691.3216,76436.8033,135746.7422,"716976.0000,732916.0000,1033055.0000,1121172.0000,1072490.0000,1103248.0000,1069815.0000,1131592.0000,1080164.0000,1099011.0000,1040610.0000,727366.0000,720574.0000,693512.0000,727206.0000,698641.0000,728899.0000,755199.0000,685767.0000,731123.0000,685446.0000,726634.0000,757643.0000,723859.0000,738787.0000,722918.0000,723669.0000,729991.0000,720774.0000,728959.0000,696868.0000,688142.0000,722056.0000,754938.0000,698050.0000,734710.0000,700495.0000,749408.0000,759607.0000,723238.0000,695015.0000,729100.0000,710925.0000,727245.0000,752884.0000,724049.0000,766630.0000,755900.0000,728228.0000,694394.0000,695145.0000,725362.0000,753936.0000,722066.0000,729360.0000,723799.0000,699644.0000,721746.0000,723810.0000,701317.0000,702269.0000,717588.0000,730893.0000,722557.0000,724039.0000,720002.0000,725593.0000,724802.0000,734199.0000,695767.0000,704172.0000,724150.0000,722427.0000,723689.0000,701246.0000,687029.0000,731283.0000,736533.0000,746452.0000,723980.0000,724090.0000,695967.0000,691909.0000,718549.0000,721364.0000,712818.0000,697729.0000,712317.0000,698942.0000,718630.0000,712458.0000,721184.0000,721696.0000,718419.0000,744578.0000,715804.0000,715975.0000,713670.0000,714411.0000,721795.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,wave_sim topology,100,1,232474800,2312390.3700,2287394.6300,2332718.1600,114343.3491,95970.3053,132063.6003,"2245292.0000,2286370.0000,2352425.0000,2351283.0000,2350291.0000,2353456.0000,2403812.0000,2345492.0000,2359138.0000,2415926.0000,2348137.0000,2354509.0000,2409172.0000,2408361.0000,2348377.0000,2381660.0000,2406077.0000,2354599.0000,2404785.0000,2358858.0000,2366642.0000,2408691.0000,2356442.0000,2351813.0000,2407579.0000,2384085.0000,2383363.0000,2358126.0000,2409363.0000,2376000.0000,2379937.0000,2442665.0000,2372393.0000,2388433.0000,2358958.0000,2344400.0000,2307470.0000,2270199.0000,2255922.0000,2266662.0000,2282934.0000,2271501.0000,2267204.0000,2245021.0000,2264509.0000,2389155.0000,2279467.0000,2272483.0000,2278635.0000,2328078.0000,2401888.0000,2378915.0000,2388123.0000,2352846.0000,2410886.0000,2372072.0000,2390698.0000,2377963.0000,2352745.0000,2380668.0000,2378705.0000,2371782.0000,2392872.0000,2382042.0000,2403732.0000,2413871.0000,2379586.0000,2349930.0000,2349700.0000,2383514.0000,2404533.0000,2391138.0000,2376811.0000,2370760.0000,2121037.0000,2041376.0000,2046165.0000,2054370.0000,2070120.0000,2027860.0000,2075691.0000,2078907.0000,2063287.0000,2054700.0000,2063627.0000,2082032.0000,2069519.0000,2080409.0000,2043740.0000,2144792.0000,2355451.0000,2370920.0000,2351543.0000,2376340.0000,2349640.0000,2380428.0000,2430002.0000,2364858.0000,2347185.0000,2333108.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,jacobi topology,100,1,102857100,1089893.9300,1066549.3800,1128354.8800,149434.2238,92664.8029,203540.9498,"1048194.0000,1044426.0000,1676334.0000,1681013.0000,1668228.0000,1678448.0000,1669150.0000,1646167.0000,989062.0000,1028697.0000,999242.0000,987609.0000,1000654.0000,1006184.0000,992558.0000,992909.0000,1014571.0000,1018187.0000,1004912.0000,990795.0000,995164.0000,1012676.0000,990325.0000,986667.0000,1065567.0000,1070116.0000,1076888.0000,1073582.0000,1079965.0000,1045599.0000,1040689.0000,1078461.0000,1044577.0000,1044307.0000,1059345.0000,1089042.0000,1043485.0000,1039477.0000,1074454.0000,1073041.0000,1077860.0000,1078602.0000,1074043.0000,1074062.0000,1044647.0000,1043566.0000,1104941.0000,1045078.0000,1044828.0000,1044647.0000,1074103.0000,1044387.0000,1047673.0000,1043716.0000,1074945.0000,1066509.0000,1075085.0000,1075536.0000,1105583.0000,1041822.0000,1049606.0000,1042222.0000,1103168.0000,1046150.0000,1073391.0000,1101234.0000,1049145.0000,1040900.0000,1047693.0000,1074965.0000,1074945.0000,1072721.0000,1050077.0000,1064044.0000,1074364.0000,1102998.0000,1109009.0000,1042212.0000,1072900.0000,1047292.0000,1073382.0000,1074073.0000,1074944.0000,1045508.0000,1046360.0000,1041732.0000,1046291.0000,1050328.0000,1068662.0000,1074884.0000,1102747.0000,1044718.0000,1047372.0000,1073562.0000,1041291.0000,1045499.0000,1078552.0000,1069715.0000,1046000.0000,1102337.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,soup topology,100,1,351211900,3306354.0000,3260527.4500,3350573.2900,229040.3164,219758.0131,236200.4717,"3515128.0000,3517602.0000,3530797.0000,3511422.0000,3514175.0000,3150967.0000,3045979.0000,3036641.0000,3037372.0000,3031633.0000,3031541.0000,3029748.0000,3040048.0000,3040428.0000,3035630.0000,3402604.0000,3512533.0000,3525307.0000,3525487.0000,3513264.0000,3519375.0000,3511321.0000,3518705.0000,3514256.0000,3508676.0000,3512092.0000,3514015.0000,3517633.0000,3512223.0000,3512112.0000,3523052.0000,3518454.0000,3519185.0000,3515839.0000,3512964.0000,3613253.0000,3516721.0000,3516320.0000,3517112.0000,3520879.0000,3426981.0000,3044546.0000,3039507.0000,3034427.0000,3029277.0000,3037343.0000,3040638.0000,3035509.0000,3049285.0000,3045718.0000,3184872.0000,3512643.0000,3510218.0000,3511641.0000,3510369.0000,3272729.0000,3038304.0000,3037122.0000,3031802.0000,3338925.0000,3522802.0000,3507854.0000,3514206.0000,3518264.0000,3516440.0000,3505430.0000,3511902.0000,3508716.0000,3515298.0000,3512482.0000,3515539.0000,3513806.0000,3520468.0000,3512452.0000,3514386.0000,3515268.0000,3326561.0000,3036401.0000,3046390.0000,3031581.0000,3038204.0000,3038736.0000,3039105.0000,3032773.0000,3034849.0000,3040148.0000,3157249.0000,3084932.0000,3037994.0000,3041520.0000,3042943.0000,3034057.0000,3041290.0000,3034868.0000,3038875.0000,3037844.0000,3034066.0000,3434876.0000,3519857.0000,3510619.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,chain topology,100,1,67477900,670709.9200,666684.0700,676170.0900,23487.3248,16169.1972,40054.8005,"827105.0000,625573.0000,673013.0000,624351.0000,621205.0000,620694.0000,620985.0000,628559.0000,622547.0000,620945.0000,621355.0000,619682.0000,620774.0000,621857.0000,668745.0000,675207.0000,675568.0000,674777.0000,674927.0000,674866.0000,675328.0000,676119.0000,677682.0000,674426.0000,675207.0000,674015.0000,681950.0000,675748.0000,674907.0000,675187.0000,673805.0000,679465.0000,676380.0000,676069.0000,674536.0000,674957.0000,674917.0000,680227.0000,675438.0000,673744.0000,674236.0000,673735.0000,675197.0000,677051.0000,677151.0000,674807.0000,674466.0000,676059.0000,675969.0000,691438.0000,675207.0000,674717.0000,676199.0000,675257.0000,674787.0000,681078.0000,675518.0000,674997.0000,673424.0000,674426.0000,675438.0000,677161.0000,673725.0000,674355.0000,675057.0000,675037.0000,673894.0000,677091.0000,675328.0000,674927.0000,675197.0000,674385.0000,675087.0000,680788.0000,675778.0000,673234.0000,674095.0000,676019.0000,674025.0000,676841.0000,673804.0000,673715.0000,675087.0000,675127.0000,675057.0000,676270.0000,676910.0000,676961.0000,675237.0000,674396.0000,674677.0000,678944.0000,674606.0000,675558.0000,674657.0000,674646.0000,674787.0000,675828.0000,674246.0000,674456.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,expanding tree topology,100,1,75523100,753557.8100,747433.6400,759043.0300,29506.3949,26767.9135,31275.5732,"711636.0000,710905.0000,783853.0000,774376.0000,776158.0000,771220.0000,773794.0000,773784.0000,773664.0000,771610.0000,772181.0000,773704.0000,779044.0000,771911.0000,772041.0000,773534.0000,772812.0000,777722.0000,772892.0000,773844.0000,774415.0000,774135.0000,774335.0000,773734.0000,773885.0000,774465.0000,772832.0000,775407.0000,772571.0000,772181.0000,770487.0000,772892.0000,778723.0000,776419.0000,773844.0000,773173.0000,772221.0000,771490.0000,780577.0000,771430.0000,772071.0000,771710.0000,771310.0000,772802.0000,772572.0000,769296.0000,770568.0000,768835.0000,779505.0000,772311.0000,771229.0000,771499.0000,770328.0000,779705.0000,774015.0000,772652.0000,772892.0000,774676.0000,780226.0000,770378.0000,774025.0000,771930.0000,773222.0000,772491.0000,777501.0000,772321.0000,771700.0000,772912.0000,773354.0000,776900.0000,711546.0000,710354.0000,708971.0000,709292.0000,712077.0000,710323.0000,710814.0000,708150.0000,707959.0000,708630.0000,716666.0000,708961.0000,708962.0000,707158.0000,709041.0000,708120.0000,716746.0000,710103.0000,708551.0000,710424.0000,710905.0000,715554.0000,710244.0000,711266.0000,711726.0000,709292.0000,708771.0000,718308.0000,711115.0000,710915.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,contracting tree topology,100,1,79239200,812828.4300,807388.2500,816587.4700,22734.8664,16715.8736,28719.1965,"817477.0000,817497.0000,754327.0000,741693.0000,742024.0000,740801.0000,747735.0000,738928.0000,740922.0000,742074.0000,742425.0000,765258.0000,819661.0000,819180.0000,822236.0000,818729.0000,837395.0000,819350.0000,822215.0000,820062.0000,819912.0000,826784.0000,821244.0000,817828.0000,817437.0000,819190.0000,825843.0000,819280.0000,817958.0000,819561.0000,818609.0000,825472.0000,818810.0000,819110.0000,820272.0000,819922.0000,823067.0000,819290.0000,821474.0000,818539.0000,821765.0000,819601.0000,818459.0000,818549.0000,818879.0000,821033.0000,819250.0000,817246.0000,818368.0000,817336.0000,827065.0000,819932.0000,817206.0000,817247.0000,819591.0000,823458.0000,817156.0000,818759.0000,819100.0000,820363.0000,823208.0000,818538.0000,819891.0000,819070.0000,821665.0000,825332.0000,819991.0000,819140.0000,819681.0000,818047.0000,822366.0000,819430.0000,818068.0000,816465.0000,816355.0000,820042.0000,821234.0000,818338.0000,818158.0000,824630.0000,821445.0000,818599.0000,819591.0000,819361.0000,819731.0000,817467.0000,817156.0000,818058.0000,820994.0000,828668.0000,822867.0000,824009.0000,819892.0000,820382.0000,825833.0000,820583.0000,818298.0000,817147.0000,818488.0000,824671.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,wave_sim topology,100,1,437327500,4345765.2400,4310673.8200,4376775.2500,167609.0507,150926.8494,179446.5480,"4458406.0000,4457373.0000,4297480.0000,4486258.0000,4423930.0000,4093373.0000,4090478.0000,4088143.0000,4090668.0000,4088204.0000,4081160.0000,4086650.0000,4417888.0000,4471549.0000,4467432.0000,4468444.0000,4463424.0000,4452163.0000,4457774.0000,4470047.0000,4457232.0000,4459427.0000,4465859.0000,4460739.0000,4462042.0000,4461430.0000,4470929.0000,4453155.0000,4454187.0000,4457554.0000,4463785.0000,4457673.0000,4450059.0000,4464777.0000,4461721.0000,4453345.0000,4458746.0000,4461761.0000,4452354.0000,4456942.0000,4454588.0000,4456972.0000,4457263.0000,4452304.0000,4454417.0000,4465017.0000,4462853.0000,4465529.0000,4132076.0000,4096599.0000,4091379.0000,4085378.0000,4086430.0000,4091249.0000,4087712.0000,4148818.0000,4400815.0000,4086059.0000,4088894.0000,4090268.0000,4076361.0000,4075459.0000,4076111.0000,4077644.0000,4255109.0000,4464547.0000,4458495.0000,4463795.0000,4454247.0000,4466129.0000,4455459.0000,4451953.0000,4466010.0000,4457303.0000,4475617.0000,4460429.0000,4460650.0000,4459577.0000,4460268.0000,4456401.0000,4462433.0000,4462774.0000,4462923.0000,4462182.0000,4255981.0000,4083705.0000,4085739.0000,4084336.0000,4081260.0000,4086229.0000,4070029.0000,4087503.0000,4420774.0000,4471660.0000,4473082.0000,4458335.0000,4462383.0000,4463144.0000,4470377.0000,4450930.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,jacobi topology,100,1,142369100,1406663.1300,1394106.5400,1416754.0700,57110.5336,48621.9921,64164.7313,"1439816.0000,1433594.0000,1442180.0000,1439685.0000,1436018.0000,1430679.0000,1441329.0000,1433955.0000,1433343.0000,1439495.0000,1431209.0000,1437571.0000,1430368.0000,1433524.0000,1436830.0000,1432442.0000,1435388.0000,1433725.0000,1435688.0000,1434385.0000,1434926.0000,1432993.0000,1434095.0000,1435487.0000,1433675.0000,1438443.0000,1429577.0000,1296444.0000,1304580.0000,1296975.0000,1298668.0000,1299370.0000,1294049.0000,1296004.0000,1301874.0000,1293428.0000,1295843.0000,1288429.0000,1298999.0000,1295142.0000,1291996.0000,1296544.0000,1297947.0000,1297055.0000,1299961.0000,1293098.0000,1292517.0000,1297726.0000,1387306.0000,1438183.0000,1444314.0000,1437902.0000,1438674.0000,1437952.0000,1438103.0000,1448102.0000,1438944.0000,1437602.0000,1438734.0000,1433995.0000,1437411.0000,1440247.0000,1437783.0000,1431861.0000,1438824.0000,1434155.0000,1440558.0000,1433093.0000,1434235.0000,1438023.0000,1435959.0000,1437541.0000,1442301.0000,1434245.0000,1432662.0000,1440567.0000,1430969.0000,1433845.0000,1441339.0000,1435337.0000,1440377.0000,1434074.0000,1436149.0000,1439184.0000,1436108.0000,1435819.0000,1441329.0000,1437030.0000,1431992.0000,1441138.0000,1433986.0000,1441259.0000,1439335.0000,1435848.0000,1438484.0000,1435337.0000,1435518.0000,1438483.0000,1431510.0000,1435518.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,soup topology,100,1,278222500,2858993.2300,2833342.6700,2876387.1100,106696.4422,77679.8162,137295.5041,"2909710.0000,2878031.0000,3005132.0000,2961409.0000,2915141.0000,2968843.0000,2848976.0000,2915782.0000,2827094.0000,2905854.0000,3017685.0000,2937753.0000,2963532.0000,2900223.0000,2974444.0000,2891035.0000,2841010.0000,2903479.0000,2906504.0000,2873442.0000,2872210.0000,2908298.0000,2876137.0000,2897477.0000,2903819.0000,2879554.0000,2865257.0000,2873282.0000,2879374.0000,2871098.0000,2904892.0000,2900413.0000,2876087.0000,2885585.0000,2890404.0000,2871749.0000,2909541.0000,2867560.0000,2876427.0000,2875536.0000,2902637.0000,2900373.0000,2877330.0000,2872521.0000,2871729.0000,2879994.0000,2883731.0000,2869584.0000,2891917.0000,2905473.0000,2883271.0000,2866218.0000,2877530.0000,2875356.0000,2902848.0000,2875687.0000,2904882.0000,2907427.0000,2866499.0000,2587400.0000,2488292.0000,2499784.0000,2504783.0000,2502589.0000,2510274.0000,2515013.0000,2513401.0000,2787920.0000,2843545.0000,2901906.0000,2854426.0000,2869154.0000,2910713.0000,2868493.0000,2864205.0000,2859435.0000,2899220.0000,2898098.0000,2874895.0000,2885876.0000,2889804.0000,2879113.0000,2843335.0000,2880836.0000,2862210.0000,2886256.0000,2867501.0000,2914259.0000,2859706.0000,2877389.0000,2875406.0000,2866469.0000,2939237.0000,2875546.0000,2877991.0000,2868923.0000,2908698.0000,2902296.0000,2852031.0000,2888109.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,chain topology,100,1,54862900,593733.4900,572308.5100,619254.8600,118918.2545,101533.7307,137279.0557,"579296.0000,584004.0000,853776.0000,840520.0000,707629.0000,666571.0000,695896.0000,692069.0000,692780.0000,697350.0000,678323.0000,674125.0000,744018.0000,743938.0000,673644.0000,446113.0000,455351.0000,473244.0000,450962.0000,458076.0000,455732.0000,461182.0000,455541.0000,459569.0000,450331.0000,453247.0000,460190.0000,458787.0000,454339.0000,459037.0000,458186.0000,559368.0000,860418.0000,829209.0000,838337.0000,865518.0000,868884.0000,866490.0000,868825.0000,875035.0000,878923.0000,839388.0000,706657.0000,536364.0000,540331.0000,520384.0000,517369.0000,525724.0000,508251.0000,514192.0000,500436.0000,517308.0000,543658.0000,540792.0000,516486.0000,506809.0000,518119.0000,521727.0000,516006.0000,526054.0000,520455.0000,535883.0000,522748.0000,514132.0000,507289.0000,501518.0000,521516.0000,614132.0000,584165.0000,579907.0000,579496.0000,548377.0000,614472.0000,580047.0000,579706.0000,578875.0000,580748.0000,580157.0000,581009.0000,577893.0000,582291.0000,584365.0000,580748.0000,577181.0000,578294.0000,584426.0000,580328.0000,581460.0000,577522.0000,581059.0000,582351.0000,579646.0000,578104.0000,582862.0000,582591.0000,574186.0000,583363.0000,582541.0000,577993.0000,578625.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,expanding tree topology,100,1,70492500,724724.0100,705917.7600,752793.2400,115394.9452,85019.3525,148501.9323,"648898.0000,665740.0000,1132123.0000,1093329.0000,1080074.0000,1072500.0000,1076447.0000,1075266.0000,1104040.0000,1138665.0000,710474.0000,723780.0000,734640.0000,724972.0000,753466.0000,719672.0000,730813.0000,718279.0000,733127.0000,728057.0000,754568.0000,723208.0000,725223.0000,725262.0000,711245.0000,710283.0000,695546.0000,709673.0000,728929.0000,675638.0000,725322.0000,717006.0000,742114.0000,692279.0000,732496.0000,688172.0000,685367.0000,723319.0000,719451.0000,727587.0000,723679.0000,726564.0000,727726.0000,691688.0000,723128.0000,702559.0000,694343.0000,726003.0000,809132.0000,722818.0000,727516.0000,727066.0000,697500.0000,734139.0000,712769.0000,700956.0000,725913.0000,725914.0000,723819.0000,724390.0000,730712.0000,720163.0000,678033.0000,645451.0000,642064.0000,653646.0000,657344.0000,655179.0000,648848.0000,654007.0000,655109.0000,653045.0000,688482.0000,665489.0000,653156.0000,643718.0000,652644.0000,666331.0000,647074.0000,680688.0000,672662.0000,647584.0000,661812.0000,682611.0000,647455.0000,645772.0000,660730.0000,640612.0000,662423.0000,647024.0000,652194.0000,645160.0000,640421.0000,653717.0000,662312.0000,640802.0000,664467.0000,635422.0000,643046.0000,650320.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,contracting tree topology,100,1,71930100,793321.2300,774146.7800,830962.1400,131366.8724,82635.8554,233338.1395,"755840.0000,670448.0000,908719.0000,991166.0000,1079102.0000,1070506.0000,1108618.0000,1068132.0000,1081768.0000,1100293.0000,1097758.0000,749198.0000,718229.0000,790616.0000,753265.0000,727036.0000,754217.0000,783453.0000,745380.0000,750890.0000,726043.0000,728137.0000,766590.0000,721786.0000,787901.0000,750741.0000,724280.0000,750820.0000,773113.0000,738587.0000,719762.0000,745981.0000,748025.0000,772361.0000,774115.0000,766350.0000,770097.0000,747745.0000,746182.0000,772922.0000,727657.0000,731234.0000,748647.0000,754608.0000,739619.0000,759126.0000,746753.0000,730712.0000,750500.0000,758405.0000,785095.0000,753475.0000,750600.0000,730051.0000,756471.0000,746823.0000,758615.0000,756331.0000,759237.0000,783913.0000,781519.0000,722848.0000,1738111.0000,784495.0000,751251.0000,783903.0000,751322.0000,820403.0000,779775.0000,754848.0000,757754.0000,720874.0000,758835.0000,740641.0000,721495.0000,734469.0000,808770.0000,761631.0000,789173.0000,753967.0000,761831.0000,747364.0000,758906.0000,751572.0000,750430.0000,784995.0000,781519.0000,756602.0000,759267.0000,751231.0000,782030.0000,759126.0000,749347.0000,762262.0000,780537.0000,750410.0000,850208.0000,747514.0000,756371.0000,810473.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,wave_sim topology,100,1,315928300,3216111.1600,3200904.9700,3226313.5800,62676.6736,44463.8159,86204.3816,"3219548.0000,3197757.0000,3294971.0000,3290733.0000,3337611.0000,3195432.0000,3201874.0000,3225739.0000,3248643.0000,3221101.0000,3247661.0000,3260876.0000,3246058.0000,3253412.0000,3224337.0000,3220530.0000,3194231.0000,3252841.0000,3240467.0000,3291013.0000,3215931.0000,3222203.0000,3226491.0000,3247962.0000,3260395.0000,3210170.0000,3186034.0000,3209198.0000,3221351.0000,3218235.0000,3249524.0000,3252409.0000,3225499.0000,3221451.0000,3252480.0000,3236912.0000,3223605.0000,3202956.0000,3225790.0000,3282587.0000,3248082.0000,3220730.0000,3222663.0000,3192787.0000,3226210.0000,3186404.0000,3233895.0000,3216151.0000,3226140.0000,3219598.0000,3228655.0000,3279852.0000,3247821.0000,3199039.0000,3241139.0000,3254534.0000,3220019.0000,3261628.0000,3247551.0000,3215660.0000,3231060.0000,3252621.0000,3160957.0000,3279071.0000,3209529.0000,3233284.0000,3168561.0000,3214719.0000,3164223.0000,3226681.0000,3241669.0000,3229847.0000,3242812.0000,3206183.0000,3200081.0000,3208657.0000,3200211.0000,3252310.0000,3214498.0000,3068151.0000,2951420.0000,2943255.0000,2972420.0000,2971437.0000,3116323.0000,3206232.0000,3219076.0000,3254214.0000,3222263.0000,3227743.0000,3218967.0000,3192186.0000,3192146.0000,3224788.0000,3248392.0000,3225709.0000,3195022.0000,3251027.0000,3252910.0000,3275885.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,jacobi topology,100,1,104879200,1105717.0700,1082524.0200,1146463.8000,153380.1899,99867.2562,217716.2736,"1010983.0000,996265.0000,1795670.0000,1736468.0000,1743301.0000,1746617.0000,1767777.0000,1242402.0000,1073692.0000,1069003.0000,1077851.0000,1075195.0000,1071899.0000,1074234.0000,1045188.0000,1075776.0000,1079503.0000,1062651.0000,1093119.0000,1075065.0000,1074033.0000,1070687.0000,1075867.0000,1072600.0000,1077639.0000,1072389.0000,1070205.0000,1076537.0000,1076848.0000,1073602.0000,1073031.0000,1076137.0000,1100052.0000,1076728.0000,1074304.0000,1073111.0000,1074995.0000,1101214.0000,1074073.0000,1070546.0000,1074364.0000,1074794.0000,1103519.0000,1103549.0000,1106875.0000,1070346.0000,1133125.0000,1068132.0000,1103579.0000,1073302.0000,1081668.0000,1072270.0000,1101335.0000,1078120.0000,1072470.0000,1070456.0000,1104430.0000,1104500.0000,1073693.0000,1074784.0000,1072359.0000,1105963.0000,1101625.0000,1075676.0000,1087548.0000,1059896.0000,1103869.0000,1102136.0000,1069394.0000,1072931.0000,1073202.0000,1107817.0000,1106445.0000,1100122.0000,1073572.0000,1077309.0000,1076567.0000,1073281.0000,1067690.0000,1103749.0000,1107206.0000,1068181.0000,1078140.0000,1101725.0000,1043566.0000,1028116.0000,1015592.0000,1027224.0000,1003580.0000,993350.0000,1025000.0000,1007817.0000,1034447.0000,1028817.0000,1031613.0000,1032785.0000,1032475.0000,1003910.0000,1002648.0000,1001796.0000" -benchmark independent task pattern with N tasks - 100,task generation,100,1,1049336100,8159441.3300,7635743.8900,8666370.0700,2611192.0084,2415266.0077,2863448.7848,"6435852.0000,11535623.0000,11711145.0000,11234062.0000,10819967.0000,9631436.0000,9951051.0000,10110945.0000,10348373.0000,9284077.0000,10437382.0000,10861126.0000,11819651.0000,10185846.0000,8535368.0000,4591968.0000,8619599.0000,10921840.0000,10126324.0000,10580504.0000,10800771.0000,8209311.0000,4452093.0000,4623548.0000,4570527.0000,4379586.0000,4237746.0000,5330708.0000,10621451.0000,9519243.0000,8037525.0000,8355018.0000,10723865.0000,10445057.0000,4565999.0000,4267563.0000,4348707.0000,4169207.0000,4099615.0000,4348377.0000,4541021.0000,4514902.0000,4499773.0000,8450028.0000,10800621.0000,10338325.0000,9123923.0000,7324395.0000,4691897.0000,4928997.0000,4976957.0000,4930250.0000,5256547.0000,6873700.0000,9006191.0000,8586586.0000,9345173.0000,11123944.0000,11841834.0000,11626124.0000,9896317.0000,10130971.0000,9953997.0000,10974670.0000,7580791.0000,7816748.0000,9888973.0000,10095355.0000,10553993.0000,10571497.0000,9341866.0000,9302343.0000,5266917.0000,4077182.0000,4449067.0000,4914048.0000,5049554.0000,5243142.0000,5081605.0000,9924661.0000,10544205.0000,11196862.0000,10434176.0000,10596474.0000,9054422.0000,9389577.0000,8025593.0000,14259223.0000,11579736.0000,6487450.0000,6376068.0000,6822333.0000,6658924.0000,8225863.0000,11408372.0000,6941509.0000,7125949.0000,6822524.0000,6897956.0000,6353976.0000" -benchmark independent task pattern with N tasks - 1000,task generation,100,1,8426660500,98678958.8600,95369883.8600,101925541.6800,16792148.2728,15127669.4791,18814610.1473,"98464277.0000,90567376.0000,121025892.0000,113586989.0000,92018555.0000,128525641.0000,105138091.0000,63383443.0000,85788021.0000,123572597.0000,116401221.0000,100634559.0000,113231113.0000,80901633.0000,104523637.0000,116165815.0000,119238836.0000,113768612.0000,64116422.0000,86265254.0000,90765611.0000,91269155.0000,112201884.0000,116474169.0000,107987470.0000,108074695.0000,77429976.0000,89059499.0000,119057822.0000,125116723.0000,93625790.0000,107365811.0000,107083266.0000,109915302.0000,77361877.0000,128073323.0000,112494138.0000,99540255.0000,112994957.0000,124194375.0000,102298221.0000,73772425.0000,102092451.0000,98461051.0000,135103704.0000,95554775.0000,88214939.0000,104499340.0000,101792933.0000,89432055.0000,112720035.0000,112672135.0000,115512686.0000,113123691.0000,77717119.0000,99427211.0000,82717333.0000,100650109.0000,93472390.0000,90703614.0000,109364698.0000,97716090.0000,124301558.0000,130270275.0000,96710084.0000,104988417.0000,101929963.0000,101305260.0000,74244100.0000,89609682.0000,109889834.0000,85255702.0000,70878332.0000,82191546.0000,71408769.0000,86507363.0000,71638743.0000,87247396.0000,85774395.0000,67236933.0000,74843686.0000,81840662.0000,109253509.0000,91927913.0000,84336341.0000,79412732.0000,69874270.0000,110240949.0000,116677443.0000,99625176.0000,103991488.0000,114315158.0000,82132705.0000,104422385.0000,110025130.0000,113819929.0000,95611472.0000,99245417.0000,70535002.0000,69953080.0000" -benchmark independent task pattern with N tasks - 5000,task generation,100,1,45086896900,495209589.0700,484110338.1200,505047482.2300,53087816.4758,45903290.8579,62834501.9968,"442441010.0000,439615666.0000,578152666.0000,537309345.0000,544672366.0000,531824003.0000,597407918.0000,506548297.0000,545330683.0000,534011457.0000,545082593.0000,504466582.0000,540536279.0000,546874938.0000,509991329.0000,515736684.0000,493347156.0000,465830853.0000,554634319.0000,509396572.0000,552485488.0000,569226697.0000,489357637.0000,466449105.0000,508191148.0000,477132906.0000,519673192.0000,494293518.0000,534323839.0000,514478019.0000,521977548.0000,488830828.0000,521262223.0000,414610864.0000,440888227.0000,469195959.0000,477943893.0000,463448911.0000,479679050.0000,408365190.0000,405166009.0000,403025695.0000,516710229.0000,459873728.0000,359151961.0000,515465070.0000,472400531.0000,483403256.0000,425173966.0000,539501289.0000,534320713.0000,563835682.0000,459170275.0000,530830661.0000,558354888.0000,480281481.0000,474284239.0000,468002588.0000,458714320.0000,479508357.0000,489667173.0000,558806353.0000,504630693.0000,498828290.0000,489400308.0000,526340865.0000,570949710.0000,435677556.0000,545334561.0000,542310120.0000,505745254.0000,509504417.0000,520719575.0000,486469004.0000,518794468.0000,554947873.0000,514749033.0000,541153529.0000,541845040.0000,581328995.0000,522405830.0000,491583403.0000,524897080.0000,486575085.0000,528054813.0000,502433871.0000,519765027.0000,506038821.0000,569032979.0000,482634309.0000,515573254.0000,373651512.0000,332806307.0000,415428391.0000,427229691.0000,410402080.0000,337655515.0000,416852641.0000,441344021.0000,437191564.0000" +benchmark intrusive graph dependency handling with N nodes - 1,creating nodes,100,5272,2108800,4.4784,4.4773,4.4793,0.0050,0.0042,0.0058,"4.4808,4.4808,4.4827,4.4808,4.4808,4.4674,4.4808,4.4808,4.4827,4.4827,4.4695,4.4808,4.4807,4.4808,4.4808,4.4676,4.4789,4.4827,4.4808,4.4808,4.4789,4.4674,4.4808,4.4789,4.4808,4.4808,4.4695,4.4808,4.4808,4.4808,4.4788,4.4808,4.4676,4.4789,4.4808,4.4808,4.4808,4.4674,4.4808,4.4808,4.4808,4.4808,4.4808,4.4676,4.4788,4.4808,4.4808,4.4827,4.4695,4.4808,4.4808,4.4825,4.4807,4.4676,4.4808,4.4808,4.4808,4.4808,4.4808,4.4674,4.4808,4.4789,4.4808,4.4789,4.4676,4.4807,4.4807,4.4789,4.4808,4.4789,4.4676,4.4808,4.4808,4.4807,4.4808,4.4676,4.4808,4.4808,4.4808,4.4808,4.4693,4.4808,4.4808,4.4808,4.4808,4.4808,4.4676,4.4788,4.4808,4.4808,4.4808,4.4676,4.4808,4.4827,4.4825,4.4807,4.4789,4.4676,4.4808,4.4827" +benchmark intrusive graph dependency handling with N nodes - 1,creating and adding dependencies,100,1096,2301600,22.3743,21.9507,24.0881,3.9108,0.5425,9.2481,"21.5447,21.5356,21.8193,22.7418,21.4900,21.5447,22.5684,22.9608,22.3768,21.7911,22.9708,22.9434,21.6907,22.4863,22.2391,21.5356,21.5356,21.4991,21.5356,21.5447,21.5347,21.5255,21.3714,21.2974,21.4717,21.5356,21.5356,21.4982,21.5356,22.6323,22.1022,21.5812,22.6788,21.4982,22.9526,21.7737,22.4033,22.9617,22.9708,22.7600,21.4900,21.5447,22.5411,22.1752,21.9927,22.9617,22.9516,21.4891,21.5447,22.3495,22.3850,21.5447,21.4900,22.9252,21.9097,22.0931,21.5447,21.5447,21.4900,22.4307,22.6049,22.9699,22.3403,21.8367,22.9708,60.8896,22.6049,21.5356,22.9434,21.6998,21.5356,21.5347,21.5164,21.3714,21.5356,21.5438,22.2664,22.4133,21.4343,21.4900,21.6542,21.9745,21.5356,21.8823,22.8522,21.4900,21.5347,22.4498,22.9617,22.4224,21.4343,22.9434,21.6177,21.5447,21.5447,21.5447,21.5447,21.3796,21.6086,22.1022" +benchmark intrusive graph dependency handling with N nodes - 1,adding and removing dependencies,100,1487,2230500,15.4954,15.4755,15.5752,0.1913,0.0110,0.4191,"15.4815,15.4553,15.5091,15.4553,15.4748,15.4822,15.4755,15.4889,15.4546,15.4755,15.4822,15.4882,15.4882,15.4620,15.4822,15.4815,15.4755,15.4822,15.4553,15.4882,15.4822,15.4889,15.4553,15.4815,15.4755,15.4822,15.4822,15.4546,15.4889,15.4755,15.4815,15.4882,15.4620,15.4822,15.4748,15.4755,15.4755,15.4553,15.4882,15.4822,15.4889,15.4822,15.4546,15.4822,15.4822,15.4822,15.4815,15.4553,15.4822,15.4815,15.4815,15.4822,15.4553,15.4815,15.4822,15.4822,15.4822,15.4546,15.4822,15.4889,17.3954,15.4822,15.4546,15.4889,15.4822,15.4889,15.4748,15.4546,15.4822,15.4822,15.4815,15.4822,15.4553,15.4755,15.4815,15.4822,15.4687,15.4681,15.4889,15.4822,15.4755,15.4546,15.4822,15.4755,15.4822,15.4815,15.4553,15.4755,15.4748,15.4748,15.4822,15.4553,15.4748,15.4822,15.4822,15.4822,15.4546,15.4755,15.4822,15.4748" +benchmark intrusive graph dependency handling with N nodes - 1,checking for dependencies,100,17108,1710800,1.3995,1.3746,1.5226,0.2451,0.0031,0.5850,"1.3761,1.3761,1.3785,1.3767,1.3685,1.3761,1.3767,1.3761,1.3761,1.3761,1.3685,1.3761,1.3761,1.3761,1.3756,1.3685,1.3761,1.3890,1.3761,1.3767,1.3685,1.3761,1.3761,1.3761,1.3761,1.3761,1.3685,1.3761,1.3761,1.3761,1.3761,1.3685,1.3767,1.3761,1.3761,1.3761,1.3685,1.3761,1.3761,1.3761,1.3767,1.3703,1.3732,1.3761,1.3761,1.3761,1.3761,1.3680,1.3761,1.3767,1.3761,1.3761,1.3685,1.3761,1.3761,1.3761,1.3767,1.3685,1.3761,1.3761,1.3761,1.3761,1.3761,1.3685,1.3761,1.3761,1.3761,1.3761,1.3685,3.8381,1.3685,1.3756,1.3761,1.3756,1.3761,1.3761,1.3685,1.3761,1.3761,1.3767,1.3761,1.3685,1.3761,1.3761,1.3761,1.3761,1.3685,1.3761,1.3761,1.3761,1.3761,1.3761,1.3685,1.3761,1.3767,1.3761,1.3761,1.3685,1.3761,1.3761" +benchmark intrusive graph dependency handling with N nodes - 10,creating nodes,100,642,2311200,39.8933,39.1918,42.7303,6.7745,0.3370,16.1419,"38.9953,38.9813,39.0436,39.0109,39.5109,39.5576,39.0109,39.0109,38.8879,39.0109,39.0125,38.9953,38.9969,39.0109,39.0125,38.8863,39.9330,39.9486,39.9486,39.4019,39.0109,39.0125,38.9953,38.8879,39.9159,39.1526,38.9969,38.9798,39.0125,39.0109,38.9953,38.8707,39.8380,39.9486,39.3551,38.9953,39.3084,39.9642,39.9486,39.8536,39.0748,38.9969,38.9953,39.4486,39.6526,39.0109,39.0125,38.8863,38.9969,39.6667,39.4330,39.3863,39.6838,38.9953,38.8879,38.9953,38.9969,39.0109,38.9969,39.3240,39.7601,39.0125,38.8863,39.0125,38.9969,38.9953,38.9969,39.2617,39.8224,38.8879,39.9315,39.1371,38.9969,38.9953,39.3551,39.7305,38.9953,38.8879,39.0109,38.9969,38.9953,39.4330,39.6526,39.1526,39.9470,39.8551,39.9486,107.2087,38.9969,38.8863,38.9969,38.9953,39.0125,39.0109,39.0109,38.9969,38.8707,38.9969,39.0109,39.0125" +benchmark intrusive graph dependency handling with N nodes - 10,creating and adding dependencies,100,100,2390000,238.2878,237.9079,239.7975,3.4892,0.5131,8.2484,"237.2300,237.6300,239.5300,237.9400,238.2300,238.4300,238.7300,237.8300,238.0300,238.0300,238.3300,238.5300,238.1300,237.7300,238.8400,237.9300,238.3300,237.9300,238.2400,238.4300,238.7300,237.1300,238.5400,237.3300,237.8300,237.9300,238.9300,237.9300,237.7300,237.7300,238.2300,238.4400,238.4300,237.5300,238.3300,238.4400,238.3300,238.5300,238.2400,237.6300,238.4300,237.8300,237.5300,237.8400,238.5300,237.7300,237.8300,238.3300,238.5300,237.3300,237.7300,238.1300,237.3300,238.1400,238.5300,237.9300,238.0300,237.6400,238.2300,237.8300,237.9300,238.2300,238.0300,238.4300,238.0300,236.5300,238.0300,238.1300,238.6300,238.5300,239.0400,237.2300,237.5300,238.3300,237.5300,237.8300,272.5900,238.5400,237.9300,237.9300,236.6300,237.3300,237.8300,237.9300,238.3300,236.6300,236.8300,237.6300,237.5300,237.9300,237.6400,237.5300,237.5300,237.5300,237.7300,237.2300,237.4300,236.9300,237.5300,236.7200" +benchmark intrusive graph dependency handling with N nodes - 10,adding and removing dependencies,100,110,2387000,222.4669,222.0579,224.0120,3.7138,0.5859,8.7439,"219.0364,219.4000,222.1273,221.0455,222.3091,222.1273,222.4909,222.4909,222.3091,221.6727,222.1364,222.1273,222.4091,222.4000,222.4091,222.2182,221.9455,222.4909,222.5818,222.4000,222.2182,222.4000,221.7636,222.2273,222.4000,222.3182,222.3091,222.3182,222.3091,221.4909,258.8364,221.7636,222.4000,222.3182,222.4909,222.3182,221.4909,222.2182,222.4909,222.5818,222.4000,222.4000,221.6727,222.4909,222.4091,222.3091,222.3182,222.0364,222.2273,221.6727,222.4909,222.5000,222.4000,222.2273,222.1273,221.6818,222.4000,222.5000,222.4000,222.1364,222.3091,222.2182,221.9455,222.4909,222.5909,222.5818,222.4091,222.3091,221.8636,222.2182,222.3182,222.4000,222.4091,222.4000,221.7636,222.3091,222.4000,222.3182,222.3091,222.1364,222.3091,221.6818,222.3091,222.1273,222.2182,222.0364,222.2273,221.6727,222.5000,222.0364,222.3091,222.4909,222.4000,222.4909,221.9455,222.4091,222.6727,220.9545,219.3000,219.3091" +benchmark intrusive graph dependency handling with N nodes - 10,checking for dependencies,100,999,2297700,23.7324,23.6891,23.8981,0.3896,0.0713,0.9138,"23.6667,23.7568,23.8068,23.7167,23.7568,23.6667,23.5265,23.7467,23.5756,23.6156,23.7467,23.6767,23.7067,23.7267,23.6667,23.7467,23.5556,23.5866,23.7568,23.7467,23.7568,23.7568,23.7467,23.6667,23.5165,23.7568,23.7467,23.7467,23.7568,23.6667,23.7568,23.7568,23.7467,23.6657,23.7568,23.5556,23.5966,23.7467,23.7568,23.6767,23.7067,23.7367,23.6667,23.5265,23.7568,23.5966,23.6266,23.6466,23.6857,23.7057,23.7367,23.6657,23.5255,23.5566,23.7467,23.6166,23.7467,23.7568,23.7568,23.7467,23.6667,23.5165,27.5375,23.7568,23.7568,23.6466,23.7067,23.6667,23.5165,23.5756,23.7467,23.6757,23.7578,23.6557,23.7578,23.7568,23.7568,23.7067,23.6567,23.5255,23.5766,23.7467,23.6867,23.7467,23.6667,23.7467,23.7467,23.7568,23.7467,23.6667,23.7467,23.7467,23.7568,23.7467,23.7568,23.6667,23.7568,23.7467,23.7467,23.7467" +benchmark intrusive graph dependency handling with N nodes - 100,creating nodes,100,61,2403400,405.5267,396.7264,448.6303,86.1946,2.1604,205.5986,"399.0820,398.9180,401.7049,397.7705,397.4426,396.9508,390.2131,397.6066,393.6721,395.6230,396.1148,392.8525,390.2131,396.7869,393.8361,394.1475,398.9180,397.9344,397.1148,398.2623,398.5902,395.3115,395.8033,392.6721,397.1148,392.6721,394.0000,394.9836,397.7705,397.9344,395.9672,397.7705,393.6557,392.3607,393.0000,392.8361,390.0492,391.8689,392.3443,397.2787,397.4426,392.5246,397.1148,398.7541,398.2623,398.2623,398.4426,398.2623,396.2951,396.6230,397.1148,394.9672,1262.8525,398.2623,399.4098,399.4098,397.6066,397.6230,398.4262,397.2787,398.4262,397.4426,398.0984,398.4262,397.6066,396.4590,398.2787,398.4262,398.2623,397.7705,396.6230,397.9344,396.6230,397.9344,398.4262,397.6066,396.7869,396.7869,398.9344,398.5902,399.0820,399.0820,399.2623,398.5902,396.1311,397.1148,397.6066,398.0984,398.4262,399.2459,396.1148,398.2787,398.2623,398.0984,397.9344,397.9344,396.6230,399.2459,398.7705,399.2459" +benchmark intrusive graph dependency handling with N nodes - 100,creating and adding dependencies,100,6,2458800,4128.7817,4112.5250,4144.0950,80.3531,62.1256,112.0881,"4149.1667,4135.8333,4167.6667,3943.8333,3947.1667,3937.1667,3947.1667,3933.6667,3928.8333,3935.5000,3927.1667,3953.8333,3930.5000,3932.1667,3943.8333,4524.8333,4167.6667,4165.8333,4164.3333,4161.0000,4149.1667,4162.5000,4159.1667,4147.5000,4172.6667,4155.8333,4144.1667,4130.8333,4159.1667,4142.5000,4137.5000,4149.1667,4149.1667,4137.5000,4147.6667,4145.8333,4150.8333,4162.5000,4152.5000,4161.0000,4164.1667,4157.6667,4145.8333,4152.5000,4176.0000,4164.1667,4157.5000,4142.5000,4140.8333,4139.1667,4152.5000,4145.8333,4161.0000,4162.5000,4167.6667,4149.1667,4144.1667,4162.6667,4162.5000,4154.3333,4164.1667,4154.3333,4162.5000,4127.5000,4160.8333,4154.1667,4145.8333,4130.8333,4144.3333,4142.5000,4147.5000,4137.5000,4152.6667,4147.5000,4165.8333,4154.1667,4155.8333,4151.0000,4139.1667,4155.8333,4162.6667,4127.5000,4134.1667,4142.5000,4147.6667,4140.8333,4145.8333,4144.1667,4156.0000,4154.1667,4150.8333,4127.5000,4124.1667,4155.8333,4165.8333,4140.8333,4147.5000,4137.5000,4147.5000,4114.1667" +benchmark intrusive graph dependency handling with N nodes - 100,adding and removing dependencies,100,6,2766600,4722.7617,4715.8850,4746.3617,58.5303,17.7714,133.5005,"4740.3333,4732.0000,4783.8333,4745.3333,4713.5000,4688.6667,4733.6667,4717.0000,4715.1667,4730.3333,4732.0000,4732.0000,4723.6667,4722.0000,4735.3333,4737.0000,4720.1667,4732.0000,4737.0000,4728.6667,4723.6667,4723.6667,4735.3333,4730.3333,4696.8333,4682.0000,4675.1667,4708.6667,4718.5000,4720.3333,4725.3333,4728.6667,4732.0000,4725.3333,4703.5000,4707.0000,4732.0000,4733.6667,4722.0000,4730.1667,4733.5000,4713.6667,4695.3333,4725.3333,4726.8333,4715.3333,4676.8333,4688.5000,4686.8333,4728.6667,4682.0000,4668.5000,4685.1667,4720.3333,4718.6667,4713.5000,5274.6667,4683.5000,4691.8333,4713.6667,4718.6667,4730.3333,4732.0000,4706.8333,4737.0000,4727.0000,4695.3333,4710.1667,4698.6667,4708.6667,4720.1667,4698.5000,4680.1667,4708.6667,4730.3333,4705.3333,4695.1667,4735.3333,4730.3333,4718.6667,4732.0000,4720.1667,4713.6667,4680.1667,4728.5000,4715.3333,4687.0000,4725.1667,4720.3333,4727.0000,4712.0000,4718.6667,4737.0000,4733.6667,4735.1667,4725.1667,4723.6667,4732.0000,4713.6667,4712.0000" +benchmark intrusive graph dependency handling with N nodes - 100,checking for dependencies,100,15,2457000,1705.3480,1702.0373,1715.0833,26.7566,9.7273,58.8326,"1705.7333,1706.4667,1947.5333,1650.3333,1699.7333,1695.0667,1701.1333,1700.4000,1702.4667,1703.0667,1699.1333,1648.3333,1705.0667,1695.8000,1709.7333,1706.4667,1704.4667,1703.0667,1706.4667,1706.4667,1708.4000,1705.8000,1702.4000,1707.7333,1707.1333,1704.4000,1707.1333,1654.3333,1704.4667,1710.4000,1703.0667,1708.4667,1711.7333,1707.7333,1711.1333,1651.0000,1707.8000,1706.4000,1700.4667,1709.1333,1705.0667,1707.8000,1711.8000,1704.4000,1705.1333,1703.7333,1704.4667,1699.0667,1707.8000,1703.1333,1705.0667,1707.1333,1707.7333,1706.4000,1704.4667,1704.4000,1699.1333,1707.0667,1705.1333,1706.4667,1707.0667,1706.4667,1705.7333,1707.0667,1695.7333,1704.4667,1704.4667,1712.4000,1707.1333,1703.1333,1702.4000,1705.1333,1705.7333,1702.4667,1701.0667,1704.4667,1707.8000,1706.4000,1711.1333,1706.4667,1705.0667,1699.1333,1707.0667,1705.1333,1703.0667,1704.4000,1707.1333,1703.7333,1703.8000,1709.1333,1700.4000,1708.4667,1704.4000,1706.4667,1709.1333,1705.0667,1705.8000,1706.4000,1699.7333,1703.0667" +benchmark task handling > without access thread,generating and deleting tasks,100,1,350050200,3424395.6900,3366224.0600,3476338.0300,281411.9655,255829.8096,298353.1373,"3626422.0000,3630520.0000,3014001.0000,3026935.0000,3010845.0000,3278813.0000,3629478.0000,3630289.0000,3627965.0000,3621572.0000,3621823.0000,3633726.0000,3629398.0000,3633986.0000,3626242.0000,3624328.0000,3631902.0000,3624608.0000,3623005.0000,3622184.0000,3626722.0000,3624598.0000,3620851.0000,3634727.0000,3629848.0000,3631301.0000,3626862.0000,3624649.0000,3625851.0000,3630590.0000,3620159.0000,3622625.0000,3449436.0000,2995525.0000,2995025.0000,3008170.0000,3007839.0000,3014391.0000,3016206.0000,3025914.0000,3000785.0000,3009383.0000,3271729.0000,3635950.0000,3634918.0000,3646210.0000,3648663.0000,3622946.0000,3623486.0000,3620431.0000,3620470.0000,3619128.0000,3618687.0000,3621022.0000,3619158.0000,3623236.0000,3619438.0000,3615039.0000,3621172.0000,3624618.0000,3627384.0000,3630820.0000,3622745.0000,3622605.0000,3624748.0000,3623607.0000,3625801.0000,3615400.0000,3617145.0000,3119290.0000,2996136.0000,3009222.0000,3020163.0000,3025683.0000,3007860.0000,3013179.0000,3016605.0000,3039660.0000,3398981.0000,3646870.0000,3639417.0000,3597757.0000,2996568.0000,3001918.0000,3007809.0000,3001377.0000,3005144.0000,3010305.0000,3002348.0000,3010625.0000,3004123.0000,3010193.0000,3514940.0000,3632713.0000,3631771.0000,3630199.0000,3632122.0000,3630269.0000,3635569.0000,3624698.0000" +benchmark task handling > with access thread,generating and deleting tasks with access thread,100,1,780463500,7845350.3700,7786064.9600,7893900.1600,271314.8827,232110.1167,305673.0042,"8022875.0000,7954125.0000,8005842.0000,7968322.0000,8034447.0000,8041651.0000,8022985.0000,7967991.0000,7768513.0000,7308581.0000,7351964.0000,7311536.0000,7341324.0000,7969855.0000,8015120.0000,7954255.0000,7983691.0000,7968762.0000,8030791.0000,7773463.0000,7267222.0000,7268345.0000,7271530.0000,7461361.0000,7976817.0000,8001273.0000,8020891.0000,8063543.0000,7999992.0000,7969584.0000,8011784.0000,7990824.0000,8039978.0000,7980304.0000,7972209.0000,7958133.0000,7998619.0000,7358666.0000,7323520.0000,7527276.0000,8049275.0000,7983591.0000,8025470.0000,8008989.0000,7980365.0000,7993670.0000,8000283.0000,8013748.0000,7955758.0000,8013417.0000,8010232.0000,8001274.0000,7966579.0000,7996385.0000,8012595.0000,7992317.0000,7967901.0000,8017736.0000,8032684.0000,7959845.0000,8030730.0000,8011063.0000,8004801.0000,7988159.0000,7965146.0000,8001855.0000,8006124.0000,8006935.0000,7964325.0000,7955958.0000,7976167.0000,7953103.0000,7969123.0000,7989813.0000,8009660.0000,7554318.0000,7313109.0000,7317278.0000,7245151.0000,7506908.0000,7317748.0000,7287020.0000,7691898.0000,7669284.0000,7307069.0000,7273234.0000,7303972.0000,7572852.0000,8022144.0000,8004520.0000,7988179.0000,8036752.0000,8055647.0000,7971187.0000,7998088.0000,8012596.0000,8020250.0000,7970346.0000,8027685.0000,7994762.0000" +generating large task graphs,soup topology,100,1,83036800,846146.1400,845714.0800,846657.7300,2384.2106,2040.6462,2934.1352,"846533.0000,850650.0000,855089.0000,846954.0000,845390.0000,844850.0000,848667.0000,843186.0000,843607.0000,844419.0000,846142.0000,850430.0000,845110.0000,842776.0000,844549.0000,846883.0000,849148.0000,846041.0000,846242.0000,844358.0000,849999.0000,845020.0000,845060.0000,847574.0000,844729.0000,850490.0000,845210.0000,845521.0000,845311.0000,844358.0000,849048.0000,845450.0000,842345.0000,843968.0000,845711.0000,844850.0000,848406.0000,847274.0000,845211.0000,848957.0000,845561.0000,846112.0000,845400.0000,846022.0000,853125.0000,846302.0000,843817.0000,846682.0000,846533.0000,849959.0000,844690.0000,844779.0000,844569.0000,849258.0000,844840.0000,844248.0000,844108.0000,845140.0000,850330.0000,846282.0000,847335.0000,845851.0000,843367.0000,849368.0000,844228.0000,845351.0000,844709.0000,842645.0000,848727.0000,845340.0000,846012.0000,843878.0000,848055.0000,845611.0000,845030.0000,844719.0000,843957.0000,851292.0000,845661.0000,847274.0000,847805.0000,845481.0000,849568.0000,846022.0000,844269.0000,845681.0000,849098.0000,844639.0000,844619.0000,846262.0000,842996.0000,846542.0000,843407.0000,844198.0000,844028.0000,844589.0000,850961.0000,845932.0000,842836.0000,843998.0000" +generating large task graphs,chain topology,100,1,3431400,35539.6300,35465.3100,35720.2600,544.4847,162.2019,965.0984,"35415.0000,35365.0000,39172.0000,36347.0000,35956.0000,35556.0000,35726.0000,35655.0000,35526.0000,35615.0000,35475.0000,35535.0000,35295.0000,35416.0000,35455.0000,35645.0000,35395.0000,35435.0000,35325.0000,35515.0000,35395.0000,35415.0000,35465.0000,35365.0000,35455.0000,35455.0000,35435.0000,35305.0000,35576.0000,35565.0000,35325.0000,35535.0000,35335.0000,35636.0000,35345.0000,35505.0000,35385.0000,35615.0000,35375.0000,35516.0000,35465.0000,35425.0000,35405.0000,35495.0000,35435.0000,35496.0000,35595.0000,35455.0000,35585.0000,35315.0000,35495.0000,35455.0000,35495.0000,35385.0000,39213.0000,35696.0000,35515.0000,35566.0000,35395.0000,35605.0000,35406.0000,35505.0000,35305.0000,35626.0000,35435.0000,35485.0000,35546.0000,35525.0000,35686.0000,35475.0000,35415.0000,35425.0000,35636.0000,35405.0000,35646.0000,35475.0000,35495.0000,35506.0000,35395.0000,35606.0000,35305.0000,35495.0000,35375.0000,35395.0000,35335.0000,35525.0000,35505.0000,35325.0000,35184.0000,35325.0000,35165.0000,35375.0000,35255.0000,35265.0000,35185.0000,35495.0000,35304.0000,35335.0000,35285.0000,35214.0000" +generating large task graphs,expanding tree topology,100,1,5638600,56693.6600,56497.3900,57305.1700,1574.1809,364.8559,3420.0146,"56025.0000,56044.0000,62917.0000,57527.0000,56986.0000,56505.0000,56876.0000,56625.0000,56425.0000,56265.0000,56215.0000,56355.0000,56485.0000,55774.0000,56505.0000,56225.0000,56014.0000,56806.0000,55894.0000,56195.0000,56645.0000,56295.0000,56375.0000,56475.0000,56365.0000,56185.0000,56415.0000,56385.0000,56405.0000,56495.0000,56485.0000,56105.0000,56415.0000,56946.0000,56896.0000,56325.0000,56164.0000,56085.0000,56064.0000,56826.0000,56365.0000,56686.0000,56465.0000,56826.0000,56335.0000,56805.0000,57818.0000,57407.0000,56626.0000,56665.0000,70642.0000,57367.0000,56485.0000,56245.0000,56485.0000,56265.0000,56716.0000,56655.0000,56736.0000,56756.0000,56345.0000,56435.0000,56515.0000,56615.0000,56675.0000,56275.0000,56365.0000,56505.0000,56115.0000,56515.0000,56315.0000,56385.0000,56405.0000,56545.0000,56736.0000,56725.0000,56445.0000,56525.0000,56665.0000,56435.0000,56335.0000,56525.0000,56415.0000,56235.0000,56415.0000,56345.0000,56254.0000,56526.0000,56014.0000,56245.0000,56585.0000,56636.0000,56675.0000,56165.0000,56545.0000,56285.0000,56415.0000,56936.0000,56115.0000,56795.0000" +generating large task graphs,contracting tree topology,100,1,7669000,79067.3100,78930.6200,79340.9000,951.4166,531.1104,1526.5683,"78727.0000,78747.0000,84759.0000,80701.0000,79539.0000,79689.0000,79789.0000,78827.0000,78918.0000,79188.0000,79018.0000,78858.0000,79048.0000,79278.0000,78637.0000,78316.0000,79008.0000,79038.0000,79098.0000,78547.0000,78487.0000,78908.0000,78797.0000,78798.0000,78797.0000,78717.0000,78807.0000,78918.0000,78436.0000,78637.0000,78737.0000,78888.0000,78657.0000,78437.0000,84438.0000,79128.0000,78647.0000,79459.0000,78747.0000,79078.0000,78948.0000,78848.0000,78978.0000,78767.0000,78697.0000,78838.0000,78677.0000,78357.0000,78627.0000,78847.0000,79328.0000,79148.0000,78838.0000,78947.0000,78988.0000,79028.0000,78968.0000,79218.0000,78928.0000,79148.0000,79038.0000,78417.0000,79318.0000,78868.0000,78888.0000,79148.0000,78827.0000,79279.0000,79138.0000,78947.0000,79248.0000,79268.0000,78938.0000,79058.0000,78557.0000,79619.0000,78567.0000,78367.0000,78747.0000,78487.0000,79348.0000,78927.0000,78497.0000,78938.0000,78627.0000,82915.0000,79199.0000,79478.0000,78967.0000,78657.0000,78877.0000,78788.0000,78537.0000,78867.0000,78477.0000,78487.0000,78807.0000,78958.0000,78337.0000,78948.0000" +generating large task graphs,wave_sim topology,100,1,32208600,286648.2500,285753.9300,288615.3200,6443.5735,2868.1434,11011.1655,"328180.0000,327219.0000,305257.0000,288975.0000,284407.0000,284657.0000,284748.0000,284538.0000,284307.0000,284838.0000,284677.0000,284167.0000,284567.0000,285639.0000,284938.0000,292412.0000,285328.0000,285338.0000,285309.0000,285510.0000,285258.0000,285259.0000,285269.0000,285479.0000,285690.0000,285349.0000,285860.0000,285649.0000,285529.0000,290248.0000,285279.0000,285840.0000,285168.0000,285059.0000,284998.0000,285079.0000,285479.0000,285559.0000,284468.0000,284918.0000,285299.0000,284908.0000,285199.0000,291370.0000,285109.0000,285840.0000,285369.0000,285599.0000,285068.0000,285378.0000,285689.0000,285129.0000,284978.0000,285309.0000,285158.0000,285149.0000,284888.0000,289377.0000,285299.0000,284978.0000,284628.0000,284888.0000,284918.0000,284477.0000,284888.0000,284717.0000,285079.0000,285359.0000,284758.0000,285199.0000,284908.0000,289076.0000,286190.0000,284728.0000,284708.0000,284778.0000,284768.0000,284798.0000,285218.0000,285299.0000,284498.0000,284787.0000,285088.0000,284828.0000,284577.0000,289066.0000,284909.0000,285288.0000,285199.0000,286421.0000,285068.0000,285018.0000,285038.0000,285199.0000,285469.0000,285319.0000,284918.0000,285209.0000,286441.0000,297562.0000" +generating large task graphs,jacobi topology,100,1,10294900,102852.1200,102713.3100,103122.4100,950.7190,562.3940,1506.0236,"102523.0000,102913.0000,108744.0000,104186.0000,102863.0000,103143.0000,103214.0000,102843.0000,103013.0000,103103.0000,103344.0000,102973.0000,106150.0000,102763.0000,102552.0000,102893.0000,102974.0000,102953.0000,102562.0000,102712.0000,102603.0000,102442.0000,102433.0000,102562.0000,102653.0000,102522.0000,102463.0000,102352.0000,102853.0000,102282.0000,102522.0000,102733.0000,102513.0000,102572.0000,102543.0000,102512.0000,102623.0000,102663.0000,102662.0000,102533.0000,102432.0000,102633.0000,102683.0000,103073.0000,102302.0000,102713.0000,102733.0000,102592.0000,102662.0000,102442.0000,102622.0000,107492.0000,102693.0000,102612.0000,103133.0000,102763.0000,102482.0000,102463.0000,102763.0000,102662.0000,102883.0000,102643.0000,103094.0000,102903.0000,102422.0000,102583.0000,102472.0000,102693.0000,102182.0000,102592.0000,102723.0000,102663.0000,102582.0000,102372.0000,102413.0000,102602.0000,102853.0000,102653.0000,102713.0000,102843.0000,102502.0000,102202.0000,102432.0000,102122.0000,102623.0000,102592.0000,102543.0000,102753.0000,102873.0000,102322.0000,106400.0000,102963.0000,102613.0000,102963.0000,102612.0000,102642.0000,102623.0000,102372.0000,102272.0000,102492.0000" +generating large command graphs for N nodes - 1,soup topology,100,1,156761900,1576571.7600,1575743.4900,1577458.4900,4371.0883,3872.5709,4987.8308,"1578610.0000,1577819.0000,1585483.0000,1580484.0000,1574342.0000,1578691.0000,1568912.0000,1573280.0000,1587907.0000,1575424.0000,1579201.0000,1572238.0000,1569774.0000,1584381.0000,1573481.0000,1579402.0000,1574553.0000,1577458.0000,1583409.0000,1572518.0000,1577428.0000,1574322.0000,1571597.0000,1584221.0000,1574913.0000,1574913.0000,1585814.0000,1575394.0000,1585293.0000,1574101.0000,1573290.0000,1580073.0000,1572178.0000,1576196.0000,1571527.0000,1576045.0000,1578300.0000,1575384.0000,1580254.0000,1572739.0000,1574293.0000,1583118.0000,1571526.0000,1583168.0000,1577168.0000,1571387.0000,1578110.0000,1570555.0000,1577858.0000,1578239.0000,1571026.0000,1582327.0000,1571186.0000,1579292.0000,1576666.0000,1577277.0000,1581907.0000,1573881.0000,1573130.0000,1579432.0000,1571366.0000,1581285.0000,1573089.0000,1572359.0000,1580063.0000,1574433.0000,1576636.0000,1569913.0000,1571617.0000,1578380.0000,1571638.0000,1578360.0000,1572328.0000,1577528.0000,1581516.0000,1573621.0000,1575124.0000,1578680.0000,1576316.0000,1578710.0000,1569193.0000,1579422.0000,1571256.0000,1574243.0000,1587226.0000,1581936.0000,1577819.0000,1574933.0000,1572419.0000,1574001.0000,1575074.0000,1574603.0000,1575234.0000,1574222.0000,1580033.0000,1574763.0000,1574092.0000,1581475.0000,1573731.0000,1585644.0000" +generating large command graphs for N nodes - 1,chain topology,100,1,12411000,110380.0100,109099.7100,112123.5600,7549.1847,6082.6636,9387.5605,"107261.0000,106941.0000,137048.0000,126839.0000,125907.0000,125546.0000,126167.0000,125606.0000,125967.0000,126147.0000,125465.0000,125616.0000,125886.0000,125506.0000,125636.0000,125776.0000,138640.0000,117550.0000,107421.0000,107612.0000,107381.0000,107242.0000,107221.0000,107392.0000,107311.0000,107061.0000,107502.0000,107452.0000,106891.0000,107221.0000,107622.0000,107622.0000,106880.0000,106991.0000,107211.0000,107321.0000,107312.0000,119454.0000,107592.0000,107121.0000,106770.0000,107091.0000,106841.0000,106881.0000,106710.0000,106981.0000,106820.0000,106920.0000,106701.0000,106780.0000,106861.0000,106530.0000,106740.0000,107191.0000,106880.0000,106840.0000,106791.0000,106911.0000,106760.0000,107031.0000,106941.0000,106921.0000,107221.0000,106941.0000,106851.0000,106971.0000,107091.0000,107141.0000,107402.0000,106860.0000,106881.0000,106931.0000,106620.0000,106861.0000,113633.0000,107181.0000,106981.0000,106750.0000,106751.0000,107121.0000,106750.0000,106519.0000,106940.0000,106941.0000,106921.0000,107091.0000,106781.0000,107041.0000,106460.0000,106971.0000,107101.0000,106871.0000,106880.0000,107051.0000,107121.0000,106800.0000,106941.0000,107332.0000,106690.0000,107382.0000" +generating large command graphs for N nodes - 1,expanding tree topology,100,1,17958800,162664.5900,162227.5700,163597.9400,3103.8713,1749.7006,5588.5897,"161985.0000,162025.0000,174189.0000,164570.0000,162716.0000,162697.0000,162867.0000,163137.0000,162496.0000,161975.0000,161253.0000,160983.0000,161734.0000,162526.0000,162987.0000,162105.0000,161965.0000,162997.0000,162376.0000,162446.0000,162786.0000,162116.0000,162235.0000,174298.0000,162396.0000,161474.0000,161173.0000,162246.0000,162256.0000,161674.0000,161885.0000,162376.0000,162276.0000,162136.0000,162385.0000,161534.0000,162376.0000,161965.0000,161835.0000,162396.0000,161975.0000,162617.0000,162225.0000,162757.0000,162787.0000,161995.0000,162376.0000,185330.0000,163177.0000,162757.0000,161143.0000,161544.0000,160943.0000,161114.0000,161724.0000,162086.0000,162035.0000,161304.0000,162566.0000,161274.0000,161144.0000,162045.0000,161855.0000,161995.0000,161655.0000,162225.0000,162305.0000,162146.0000,161835.0000,162466.0000,161745.0000,161674.0000,170742.0000,161975.0000,161885.0000,161324.0000,160883.0000,160252.0000,161534.0000,161444.0000,162085.0000,161214.0000,162496.0000,162276.0000,162426.0000,161304.0000,162015.0000,161845.0000,160953.0000,161715.0000,161835.0000,161664.0000,161374.0000,162025.0000,161414.0000,161985.0000,169039.0000,163077.0000,162867.0000,162145.0000" +generating large command graphs for N nodes - 1,contracting tree topology,100,1,21186000,200797.1500,197999.9500,203430.3400,13819.3155,13066.6311,14302.8188,"211319.0000,211709.0000,193004.0000,183797.0000,183085.0000,183516.0000,184217.0000,182924.0000,184158.0000,182995.0000,183966.0000,183806.0000,183346.0000,184197.0000,183346.0000,183416.0000,183836.0000,185280.0000,182343.0000,183135.0000,182764.0000,189838.0000,183265.0000,185720.0000,183767.0000,182814.0000,183596.0000,184107.0000,183816.0000,184077.0000,183275.0000,183596.0000,183455.0000,183657.0000,184187.0000,182795.0000,184357.0000,184138.0000,183355.0000,184248.0000,184668.0000,184528.0000,195348.0000,212520.0000,212470.0000,212090.0000,211188.0000,211910.0000,211810.0000,211980.0000,211038.0000,212100.0000,212661.0000,211850.0000,210938.0000,212661.0000,211850.0000,211319.0000,211699.0000,211890.0000,211469.0000,219394.0000,212140.0000,213173.0000,212100.0000,212130.0000,212832.0000,213402.0000,211529.0000,211589.0000,211348.0000,212290.0000,211890.0000,212090.0000,211850.0000,211890.0000,211980.0000,212370.0000,212330.0000,212080.0000,217050.0000,212140.0000,211680.0000,211719.0000,211349.0000,211940.0000,211108.0000,211329.0000,211068.0000,211679.0000,211459.0000,212010.0000,212551.0000,211770.0000,211138.0000,211980.0000,212100.0000,212410.0000,211610.0000,217009.0000" +generating large command graphs for N nodes - 1,wave_sim topology,100,1,103162900,1046738.3400,1032017.7600,1059516.4900,70294.7697,61690.4332,76601.3010,"927206.0000,933357.0000,1127465.0000,1092870.0000,1087489.0000,1095766.0000,1086838.0000,1085405.0000,1085756.0000,1091307.0000,1089924.0000,1087179.0000,1086567.0000,1090606.0000,1085656.0000,1084143.0000,1086247.0000,1093441.0000,1083983.0000,1087098.0000,1095395.0000,1086107.0000,1086557.0000,1086868.0000,1095084.0000,1085806.0000,1087670.0000,1085816.0000,1092799.0000,1086197.0000,1083903.0000,1090675.0000,1083512.0000,1085055.0000,1087530.0000,1090405.0000,1085596.0000,1086727.0000,1091667.0000,1093340.0000,1085455.0000,1088011.0000,1092228.0000,1086217.0000,1085716.0000,1083362.0000,1090755.0000,1081979.0000,1085596.0000,1084253.0000,1098330.0000,1085104.0000,1083902.0000,1091247.0000,1086868.0000,1083312.0000,1087779.0000,1093470.0000,1085065.0000,1087379.0000,1088331.0000,1093060.0000,1085867.0000,1082550.0000,1090054.0000,1089854.0000,1085816.0000,1085486.0000,1090525.0000,1086017.0000,1088652.0000,1086918.0000,1091427.0000,1087800.0000,1086367.0000,1073303.0000,925983.0000,924721.0000,927486.0000,925643.0000,937785.0000,924010.0000,924130.0000,927436.0000,933798.0000,927647.0000,925442.0000,924751.0000,930942.0000,926825.0000,926956.0000,926534.0000,927005.0000,935271.0000,928128.0000,925502.0000,933527.0000,936293.0000,928869.0000,926083.0000" +generating large command graphs for N nodes - 1,jacobi topology,100,1,33679000,351385.1900,350580.9200,354041.3300,6675.6195,2277.1035,14855.5262,"349481.0000,368326.0000,362916.0000,351274.0000,351204.0000,350553.0000,350452.0000,350984.0000,350141.0000,349280.0000,354981.0000,350352.0000,349952.0000,350292.0000,348058.0000,349530.0000,349139.0000,349610.0000,349821.0000,351604.0000,351655.0000,350783.0000,353909.0000,349060.0000,349501.0000,348599.0000,348028.0000,349320.0000,349361.0000,349420.0000,349661.0000,350603.0000,349411.0000,353558.0000,349791.0000,349260.0000,350402.0000,350683.0000,350372.0000,349851.0000,348729.0000,350031.0000,349471.0000,349180.0000,354289.0000,350151.0000,349620.0000,349550.0000,349351.0000,350552.0000,350101.0000,350834.0000,349951.0000,349501.0000,349120.0000,350162.0000,357546.0000,352336.0000,350022.0000,349801.0000,350142.0000,350923.0000,350803.0000,350392.0000,349710.0000,350252.0000,349661.0000,355482.0000,350983.0000,351444.0000,349620.0000,351395.0000,351043.0000,351625.0000,349931.0000,351875.0000,350602.0000,350803.0000,350383.0000,357055.0000,349220.0000,349851.0000,350483.0000,350513.0000,349921.0000,350352.0000,349490.0000,350843.0000,350292.0000,350332.0000,412290.0000,350633.0000,349671.0000,349881.0000,349601.0000,349771.0000,349020.0000,350111.0000,350101.0000,350543.0000" +generating large command graphs for N nodes - 4,soup topology,100,1,197995600,1922108.2200,1894065.5600,1946387.9100,130770.1451,112089.7308,145193.9379,"1682717.0000,1677288.0000,2008004.0000,1995641.0000,1992485.0000,1990792.0000,1993396.0000,1990502.0000,1991954.0000,1992755.0000,1995671.0000,1994999.0000,1997464.0000,1992625.0000,1996463.0000,1991753.0000,1997033.0000,1996893.0000,1993257.0000,1994879.0000,1998767.0000,1990951.0000,1986032.0000,1996813.0000,1992905.0000,1992664.0000,1985651.0000,1993217.0000,1987546.0000,1996582.0000,1997524.0000,2003956.0000,1992725.0000,1996262.0000,1991002.0000,2000490.0000,1989349.0000,1995300.0000,1987245.0000,1998506.0000,1987897.0000,1993847.0000,1988357.0000,1994058.0000,1985292.0000,1995550.0000,1988787.0000,1999899.0000,1991242.0000,1994990.0000,1988327.0000,1991312.0000,1991222.0000,1994418.0000,1994920.0000,2000841.0000,1989649.0000,1990361.0000,1987836.0000,1996062.0000,1988838.0000,1993988.0000,1993747.0000,2003345.0000,1991623.0000,1993397.0000,1989379.0000,1992925.0000,1985321.0000,1995531.0000,1989800.0000,1996302.0000,1997414.0000,1997934.0000,1722923.0000,1698307.0000,1673891.0000,1685954.0000,1675253.0000,1674231.0000,1681957.0000,1676105.0000,1681866.0000,1675935.0000,1680413.0000,1674312.0000,1678469.0000,1681946.0000,1677227.0000,1688378.0000,1675554.0000,1676195.0000,1989298.0000,1987415.0000,1995942.0000,1995160.0000,1995581.0000,1712033.0000,1682938.0000,1679070.0000" +generating large command graphs for N nodes - 4,chain topology,100,1,36155700,358519.1400,353676.6500,363228.4500,24324.9355,23372.3698,25076.2766,"330765.0000,329031.0000,356804.0000,346194.0000,331928.0000,329572.0000,330083.0000,331918.0000,330845.0000,330875.0000,330194.0000,332228.0000,330635.0000,329713.0000,331416.0000,338670.0000,331597.0000,331757.0000,330625.0000,330855.0000,330104.0000,330274.0000,329572.0000,330755.0000,330604.0000,331276.0000,330775.0000,355682.0000,380589.0000,380599.0000,379067.0000,379818.0000,378646.0000,379507.0000,380279.0000,379518.0000,379106.0000,379257.0000,384908.0000,379618.0000,379668.0000,379748.0000,379427.0000,378466.0000,379487.0000,380349.0000,381050.0000,380760.0000,385078.0000,378736.0000,379498.0000,379898.0000,379487.0000,379027.0000,379237.0000,380579.0000,379558.0000,378826.0000,379067.0000,385178.0000,380880.0000,379538.0000,380138.0000,379057.0000,378576.0000,379808.0000,379407.0000,380109.0000,377774.0000,384116.0000,378105.0000,379608.0000,379147.0000,379046.0000,379447.0000,378556.0000,379016.0000,378917.0000,378576.0000,379377.0000,385138.0000,392422.0000,377875.0000,329823.0000,329212.0000,329884.0000,329322.0000,329633.0000,328871.0000,329202.0000,329753.0000,338229.0000,329342.0000,328541.0000,328972.0000,329152.0000,329052.0000,329473.0000,329953.0000,330084.0000" +generating large command graphs for N nodes - 4,expanding tree topology,100,1,45620600,459281.2400,457757.8800,465585.7600,13581.6730,2598.5344,31655.2850,"456734.0000,590578.0000,480238.0000,460180.0000,458517.0000,457926.0000,456323.0000,456193.0000,457115.0000,455331.0000,456954.0000,464529.0000,457135.0000,459048.0000,455762.0000,457695.0000,455853.0000,457605.0000,458297.0000,461694.0000,456152.0000,458116.0000,458307.0000,458257.0000,459649.0000,455452.0000,458617.0000,457716.0000,462966.0000,458257.0000,456593.0000,458517.0000,457986.0000,458738.0000,455060.0000,458938.0000,456583.0000,462846.0000,457335.0000,458647.0000,459859.0000,458066.0000,457566.0000,458026.0000,456654.0000,463517.0000,457094.0000,456523.0000,456253.0000,454941.0000,456493.0000,458227.0000,458096.0000,456894.0000,461843.0000,456754.0000,457265.0000,458016.0000,457395.0000,457265.0000,457035.0000,457365.0000,456584.0000,463496.0000,459348.0000,455952.0000,456193.0000,457826.0000,458417.0000,456874.0000,453728.0000,455031.0000,458908.0000,454129.0000,454169.0000,455662.0000,455622.0000,456243.0000,453568.0000,455982.0000,460201.0000,455822.0000,455973.0000,454559.0000,454910.0000,455452.0000,457625.0000,460340.0000,457977.0000,466252.0000,458607.0000,457686.0000,459439.0000,456754.0000,455511.0000,455942.0000,456974.0000,457686.0000,462946.0000,460150.0000" +generating large command graphs for N nodes - 4,contracting tree topology,100,1,46793100,490426.6300,489904.5000,491113.2700,3034.5143,2454.0985,4157.0551,"489185.0000,494936.0000,499535.0000,495718.0000,492091.0000,490208.0000,489245.0000,490348.0000,489416.0000,488524.0000,491911.0000,495217.0000,490087.0000,489155.0000,488914.0000,486461.0000,489816.0000,489576.0000,488794.0000,495638.0000,488975.0000,488484.0000,488544.0000,489356.0000,488344.0000,488955.0000,487011.0000,494415.0000,488113.0000,486601.0000,488594.0000,488705.0000,487332.0000,488694.0000,487331.0000,496499.0000,488754.0000,488604.0000,488344.0000,486851.0000,486740.0000,487922.0000,488303.0000,488644.0000,493884.0000,489015.0000,487763.0000,488244.0000,488815.0000,488484.0000,486530.0000,488053.0000,495377.0000,489716.0000,489826.0000,489386.0000,489786.0000,486891.0000,489345.0000,490568.0000,504985.0000,491870.0000,487482.0000,488293.0000,487502.0000,487662.0000,490197.0000,492281.0000,498884.0000,491600.0000,489907.0000,490498.0000,491079.0000,491259.0000,489375.0000,491570.0000,496169.0000,489777.0000,489105.0000,489316.0000,490417.0000,491249.0000,489806.0000,490417.0000,495728.0000,491991.0000,490859.0000,490297.0000,490969.0000,491741.0000,491590.0000,493063.0000,495006.0000,491490.0000,492201.0000,489967.0000,489566.0000,490177.0000,490888.0000,489857.0000" +generating large command graphs for N nodes - 4,wave_sim topology,100,1,217517400,2210197.9800,2182189.3400,2232619.3400,127731.5556,107349.2384,145579.3422,"2269530.0000,2271122.0000,2320457.0000,2293736.0000,2269079.0000,2272465.0000,2267415.0000,2144243.0000,1943633.0000,1946768.0000,1946357.0000,2097313.0000,2279418.0000,2267396.0000,2270933.0000,2268257.0000,2270823.0000,2267366.0000,2277986.0000,2268608.0000,2277184.0000,2275582.0000,2269310.0000,2275751.0000,2275060.0000,2279258.0000,2267396.0000,2283867.0000,2273027.0000,2263558.0000,2273377.0000,2267386.0000,2277635.0000,2269540.0000,2274840.0000,2276182.0000,2271895.0000,2272917.0000,2271713.0000,2277424.0000,2264220.0000,2274199.0000,2313293.0000,2270091.0000,2281613.0000,2269520.0000,2275732.0000,2270642.0000,2278136.0000,2276203.0000,2268257.0000,2275661.0000,2274379.0000,2274079.0000,2269039.0000,2278427.0000,2276102.0000,2265713.0000,2278927.0000,2271113.0000,2277675.0000,2275431.0000,2282084.0000,2278286.0000,2276504.0000,2281603.0000,2278156.0000,2279649.0000,2269510.0000,2275230.0000,2272275.0000,2277405.0000,2283406.0000,2269140.0000,2277274.0000,2272145.0000,2276563.0000,2270221.0000,2272877.0000,2271344.0000,2059872.0000,1951337.0000,1946868.0000,1952599.0000,1936909.0000,1945025.0000,1937631.0000,1953440.0000,1940716.0000,1952569.0000,1948351.0000,1945606.0000,1948180.0000,1946468.0000,1946868.0000,1938723.0000,2228351.0000,2279959.0000,2272766.0000,2279629.0000" +generating large command graphs for N nodes - 4,jacobi topology,100,1,78492300,825648.0500,825111.5700,826360.9000,3132.0315,2520.3063,4593.0553,"825292.0000,824781.0000,842224.0000,825362.0000,825042.0000,826284.0000,824260.0000,831454.0000,823770.0000,824391.0000,823710.0000,824571.0000,833268.0000,823689.0000,824762.0000,825863.0000,826214.0000,832345.0000,824541.0000,825332.0000,825393.0000,822777.0000,830121.0000,826184.0000,826184.0000,822818.0000,829831.0000,825293.0000,824701.0000,824361.0000,824310.0000,829961.0000,824731.0000,826524.0000,825873.0000,825243.0000,833057.0000,824230.0000,824370.0000,824291.0000,822968.0000,830713.0000,823308.0000,824120.0000,823038.0000,824531.0000,828509.0000,824842.0000,829019.0000,823109.0000,825202.0000,828940.0000,824401.0000,826424.0000,824922.0000,829250.0000,823649.0000,822066.0000,823138.0000,821986.0000,829881.0000,824320.0000,823890.0000,822447.0000,822217.0000,829059.0000,824601.0000,823248.0000,822167.0000,826605.0000,829590.0000,822888.0000,821455.0000,822908.0000,824992.0000,830523.0000,821806.0000,823779.0000,824892.0000,826745.0000,829982.0000,824030.0000,825042.0000,823519.0000,824641.0000,827247.0000,822988.0000,823720.0000,825152.0000,828098.0000,826224.0000,825814.0000,824100.0000,823269.0000,830402.0000,824381.0000,823619.0000,822457.0000,823228.0000,827336.0000" +generating large command graphs for N nodes - 16,soup topology,100,1,247659500,2520158.2000,2477950.3400,2556478.8700,199963.7002,177669.6421,216372.0965,"2198525.0000,2192223.0000,2667715.0000,2656503.0000,2641225.0000,2651434.0000,2651684.0000,2224715.0000,2206791.0000,2196512.0000,2198425.0000,2189127.0000,2519453.0000,2649740.0000,2643379.0000,2655051.0000,2654459.0000,2642627.0000,2647717.0000,2646805.0000,2641424.0000,2646955.0000,2654420.0000,2648187.0000,2411488.0000,2193225.0000,2200950.0000,2190600.0000,2646885.0000,2657084.0000,2644270.0000,2650422.0000,2652776.0000,2647456.0000,2650693.0000,2652606.0000,2643829.0000,2652535.0000,2657675.0000,2651484.0000,2649800.0000,2654168.0000,2643048.0000,2345273.0000,2200989.0000,2189017.0000,2197433.0000,2188306.0000,2188416.0000,2176814.0000,2183146.0000,2184899.0000,2194668.0000,2185671.0000,2637477.0000,2637246.0000,2636736.0000,2648739.0000,2648818.0000,2641073.0000,2644340.0000,2645733.0000,2642777.0000,2642446.0000,2648498.0000,2645363.0000,2650822.0000,2648699.0000,2641085.0000,2663146.0000,2645161.0000,2636315.0000,2650642.0000,2654360.0000,2641815.0000,2642547.0000,2641936.0000,2636575.0000,2638639.0000,2641906.0000,2632468.0000,2645803.0000,2645623.0000,2642937.0000,2640974.0000,2639451.0000,2638169.0000,2737116.0000,2650192.0000,2645072.0000,2648909.0000,2641595.0000,2641675.0000,2342338.0000,2188967.0000,2198455.0000,2192995.0000,2191773.0000,2197583.0000,2190109.0000" +generating large command graphs for N nodes - 16,chain topology,100,1,120311800,1230936.9400,1219318.8700,1238793.8400,48272.0817,35683.1840,61234.0439,"1247603.0000,1244978.0000,1266038.0000,1248624.0000,1250768.0000,1253864.0000,1246291.0000,1245078.0000,1253924.0000,1237503.0000,1082941.0000,1078042.0000,1095605.0000,1076578.0000,1078592.0000,1085817.0000,1081408.0000,1078161.0000,1078793.0000,1145169.0000,1235159.0000,1243806.0000,1250779.0000,1244307.0000,1243766.0000,1245659.0000,1247002.0000,1243235.0000,1241671.0000,1249647.0000,1245499.0000,1243205.0000,1250899.0000,1242393.0000,1245019.0000,1247152.0000,1245769.0000,1242704.0000,1269114.0000,1245679.0000,1248755.0000,1247773.0000,1258273.0000,1243335.0000,1244707.0000,1260638.0000,1247022.0000,1244487.0000,1256680.0000,1246301.0000,1245399.0000,1251470.0000,1245960.0000,1244056.0000,1250088.0000,1244467.0000,1244968.0000,1243615.0000,1249877.0000,1243304.0000,1243795.0000,1249105.0000,1241261.0000,1242984.0000,1254386.0000,1242353.0000,1249306.0000,1247613.0000,1244287.0000,1239307.0000,1248524.0000,1242994.0000,1245408.0000,1242783.0000,1249266.0000,1242874.0000,1245258.0000,1249045.0000,1242243.0000,1242734.0000,1250237.0000,1241270.0000,1244066.0000,1249556.0000,1245249.0000,1244898.0000,1244978.0000,1248395.0000,1243685.0000,1240650.0000,1251571.0000,1243695.0000,1247082.0000,1251710.0000,1244547.0000,1245058.0000,1252622.0000,1243706.0000,1245148.0000,1250629.0000" +generating large command graphs for N nodes - 16,expanding tree topology,100,1,107483100,1090513.9300,1075230.8400,1103154.6800,70598.1934,60601.3064,78725.7188,"1128276.0000,1132514.0000,1175256.0000,1128677.0000,1132625.0000,1128427.0000,1136262.0000,1128006.0000,1125552.0000,1129118.0000,1123638.0000,1124409.0000,1125812.0000,1132284.0000,1126583.0000,1127064.0000,1133607.0000,1126423.0000,1126253.0000,1121443.0000,1131552.0000,1126343.0000,1125842.0000,1128146.0000,1135681.0000,1128076.0000,1127104.0000,1136222.0000,1127004.0000,1123838.0000,1126644.0000,1130521.0000,1129359.0000,1124519.0000,1132756.0000,1128778.0000,1128727.0000,1127986.0000,1133476.0000,1122375.0000,1124159.0000,1135811.0000,1125321.0000,1124039.0000,1126253.0000,1138125.0000,1127405.0000,1130270.0000,1132054.0000,1125922.0000,1125361.0000,1125942.0000,1134288.0000,1125241.0000,1125331.0000,1129118.0000,1121675.0000,1127375.0000,1123748.0000,1127005.0000,1129739.0000,1032746.0000,958054.0000,969696.0000,955409.0000,956651.0000,954378.0000,960378.0000,957513.0000,955079.0000,954697.0000,989022.0000,954317.0000,955710.0000,955198.0000,965468.0000,956491.0000,959437.0000,957233.0000,964466.0000,956090.0000,958895.0000,956040.0000,957093.0000,1116063.0000,1129599.0000,1128527.0000,1131032.0000,1126052.0000,1123708.0000,1127205.0000,1129298.0000,1130250.0000,1124099.0000,1129689.0000,1131493.0000,1127535.0000,1123448.0000,1135741.0000,1126233.0000" +generating large command graphs for N nodes - 16,contracting tree topology,100,1,115219900,1205571.5900,1194051.7600,1212683.9000,45443.5744,29509.2994,61981.2116,"1025613.0000,1024981.0000,1280765.0000,1218638.0000,1214741.0000,1221704.0000,1213548.0000,1210633.0000,1211835.0000,1219450.0000,1215823.0000,1217105.0000,1218918.0000,1215061.0000,1215071.0000,1221874.0000,1212166.0000,1217506.0000,1218838.0000,1214360.0000,1215502.0000,1211975.0000,1220151.0000,1211745.0000,1211534.0000,1222545.0000,1213448.0000,1211204.0000,1222746.0000,1214069.0000,1223417.0000,1221263.0000,1225862.0000,1218868.0000,1218658.0000,1223137.0000,1217255.0000,1218087.0000,1220812.0000,1219159.0000,1219509.0000,1223086.0000,1221634.0000,1221553.0000,1220862.0000,1227275.0000,1220552.0000,1214400.0000,1223628.0000,1217335.0000,1213348.0000,1222556.0000,1218307.0000,1218057.0000,1219650.0000,1219199.0000,1217275.0000,1216654.0000,1219219.0000,1211734.0000,1215452.0000,1218658.0000,1214249.0000,1217627.0000,1223096.0000,1215152.0000,1210893.0000,1214149.0000,1221503.0000,1212606.0000,1212366.0000,1220201.0000,1211094.0000,1212436.0000,1218528.0000,1215773.0000,1216173.0000,1213098.0000,1217746.0000,1214600.0000,1213128.0000,1220582.0000,1210202.0000,1212607.0000,1217295.0000,1214150.0000,1211885.0000,1217866.0000,1212306.0000,1211665.0000,1210803.0000,1219139.0000,1209651.0000,1209952.0000,1218798.0000,1140159.0000,1025773.0000,1028006.0000,1052823.0000,1029069.0000" +generating large command graphs for N nodes - 16,wave_sim topology,100,1,409248800,4149140.3000,4083101.4200,4204186.6800,306017.9250,262076.3335,342254.3572,"4313804.0000,4307393.0000,4409637.0000,4323102.0000,4316980.0000,4328592.0000,4316991.0000,4319265.0000,4320918.0000,4315979.0000,4329263.0000,4326950.0000,4325076.0000,4307373.0000,4309938.0000,4318623.0000,4314045.0000,4324304.0000,4323242.0000,4331759.0000,4314947.0000,4302413.0000,4308214.0000,4328142.0000,4315158.0000,4318914.0000,4312642.0000,4308505.0000,4315718.0000,4320126.0000,4307192.0000,4323974.0000,4316871.0000,4359080.0000,3901544.0000,3558773.0000,3535489.0000,3523547.0000,3540960.0000,3685734.0000,4311872.0000,4318313.0000,4322331.0000,4326078.0000,4331668.0000,4325637.0000,4330305.0000,3919376.0000,3570185.0000,3620791.0000,3536291.0000,3542603.0000,3743483.0000,4325567.0000,3560026.0000,3561448.0000,3563633.0000,3559515.0000,3567781.0000,3561449.0000,3558322.0000,3564965.0000,3869723.0000,4325567.0000,4310598.0000,4309807.0000,4295159.0000,4315037.0000,4307923.0000,4314717.0000,4321539.0000,4317421.0000,4314165.0000,4314236.0000,4319125.0000,4299497.0000,4318573.0000,4308985.0000,4318514.0000,4306060.0000,4318123.0000,4325557.0000,4332019.0000,4324736.0000,4306531.0000,4326599.0000,4314797.0000,4310378.0000,3563944.0000,3555597.0000,3587608.0000,4310870.0000,4281223.0000,4288797.0000,4304658.0000,4290671.0000,4319525.0000,4319105.0000,4318444.0000,4325386.0000" +generating large command graphs for N nodes - 16,jacobi topology,100,1,235678300,2322835.4800,2288527.8300,2351656.3000,161050.6694,139636.9025,178493.5345,"2419935.0000,2074240.0000,2430315.0000,2412221.0000,2414965.0000,2405427.0000,2419164.0000,2418021.0000,2413081.0000,2420405.0000,2415166.0000,2412492.0000,2415056.0000,2406639.0000,2452857.0000,2415767.0000,2411810.0000,2416689.0000,2406329.0000,2416709.0000,2096281.0000,2034033.0000,2015779.0000,2027571.0000,2017442.0000,2399777.0000,2416939.0000,2409876.0000,2416398.0000,2411800.0000,2413753.0000,2406660.0000,2406399.0000,2414024.0000,2417861.0000,2408894.0000,2416739.0000,2405727.0000,2419334.0000,2422841.0000,2413943.0000,2411189.0000,2057838.0000,2034144.0000,2018194.0000,2231348.0000,2192955.0000,2028242.0000,2016050.0000,2024585.0000,2017713.0000,2055864.0000,2020247.0000,2026449.0000,2016220.0000,2021280.0000,2015138.0000,2091242.0000,2021419.0000,2029304.0000,2017392.0000,2398474.0000,2417430.0000,2409065.0000,2416148.0000,2406079.0000,2421117.0000,2413172.0000,2405398.0000,2432719.0000,2411700.0000,2416859.0000,2419755.0000,2411008.0000,2413773.0000,2413002.0000,2406509.0000,2412310.0000,2410758.0000,2418562.0000,2413743.0000,2415466.0000,2414826.0000,2415537.0000,2411278.0000,2420876.0000,2416068.0000,2435995.0000,2414935.0000,2409805.0000,2411830.0000,2416468.0000,2412922.0000,2403745.0000,2415927.0000,2416869.0000,2412210.0000,2413152.0000,2408884.0000,2413002.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,soup topology,100,1,156585200,1600797.1700,1581270.7700,1614211.6400,81568.5703,59845.4867,103352.8066,"1627974.0000,1625168.0000,1368823.0000,1347983.0000,1344988.0000,1381016.0000,1621471.0000,1623165.0000,1635829.0000,1624898.0000,1629928.0000,1626140.0000,1627001.0000,1632382.0000,1622193.0000,1630860.0000,1623555.0000,1620289.0000,1422415.0000,1342182.0000,1337884.0000,1347502.0000,1340179.0000,1343595.0000,1606092.0000,1620840.0000,1629967.0000,1625308.0000,1624508.0000,1629787.0000,1627794.0000,1631511.0000,1626962.0000,1628835.0000,1623987.0000,1623005.0000,1627573.0000,1625289.0000,1630288.0000,1628314.0000,1631901.0000,1629557.0000,1629296.0000,1631190.0000,1626721.0000,1623736.0000,1627583.0000,1630789.0000,1636260.0000,1630248.0000,1623175.0000,1637652.0000,1625439.0000,1630508.0000,1624396.0000,1625839.0000,1633064.0000,1622604.0000,1630589.0000,1627954.0000,1630008.0000,1624778.0000,1627333.0000,1632633.0000,1626320.0000,1625610.0000,1628955.0000,1626260.0000,1629206.0000,1624988.0000,1625329.0000,1624838.0000,1624066.0000,1638924.0000,1628445.0000,1627623.0000,1626321.0000,1627453.0000,1632822.0000,1630909.0000,1634546.0000,1623816.0000,1635198.0000,1633174.0000,1619648.0000,1627583.0000,1624988.0000,1629777.0000,1631150.0000,1630559.0000,1645668.0000,1627412.0000,1628104.0000,1624627.0000,1621912.0000,1633173.0000,1627803.0000,1628135.0000,1626881.0000,1626761.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,chain topology,100,1,12233400,119607.8700,117634.9400,121561.3300,10040.2603,9746.3603,10502.2742,"108544.0000,108514.0000,136527.0000,129264.0000,128982.0000,128592.0000,128632.0000,128292.0000,127700.0000,128592.0000,128672.0000,128502.0000,128532.0000,128271.0000,128241.0000,128252.0000,128161.0000,128010.0000,128050.0000,128381.0000,128261.0000,132850.0000,128352.0000,128351.0000,128151.0000,128040.0000,127710.0000,128191.0000,128171.0000,127930.0000,127900.0000,128732.0000,128041.0000,127981.0000,128081.0000,128492.0000,127940.0000,128272.0000,127900.0000,128041.0000,128332.0000,127760.0000,128492.0000,127720.0000,128181.0000,128051.0000,128392.0000,128191.0000,128251.0000,127911.0000,128211.0000,128351.0000,132269.0000,128792.0000,135104.0000,130986.0000,109015.0000,108925.0000,108233.0000,108554.0000,108464.0000,108914.0000,108965.0000,108774.0000,108633.0000,108383.0000,108534.0000,108634.0000,108173.0000,108815.0000,108513.0000,108814.0000,108384.0000,108754.0000,108484.0000,108073.0000,108654.0000,108874.0000,108524.0000,108664.0000,108494.0000,108654.0000,108965.0000,108263.0000,108404.0000,108604.0000,108524.0000,108984.0000,120266.0000,109405.0000,108574.0000,108694.0000,108674.0000,108904.0000,108935.0000,108874.0000,108714.0000,108333.0000,108183.0000,108493.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,expanding tree topology,100,1,18103900,187228.9400,186309.5400,190739.2100,8079.3859,1789.7585,18734.7650,"185460.0000,185510.0000,200298.0000,188685.0000,187122.0000,187082.0000,186652.0000,188125.0000,187043.0000,187473.0000,187393.0000,187614.0000,192132.0000,186913.0000,186692.0000,186832.0000,187203.0000,186161.0000,186001.0000,185991.0000,186361.0000,186291.0000,186381.0000,186472.0000,186501.0000,186160.0000,186481.0000,187464.0000,186030.0000,186151.0000,186802.0000,187564.0000,186601.0000,191691.0000,187263.0000,185921.0000,186442.0000,185590.0000,186251.0000,186191.0000,185921.0000,186812.0000,185720.0000,186010.0000,186081.0000,185620.0000,185911.0000,185640.0000,186001.0000,186131.0000,185269.0000,184638.0000,184307.0000,186111.0000,185200.0000,192483.0000,185940.0000,185590.0000,185670.0000,184578.0000,184107.0000,184948.0000,184337.0000,184989.0000,184929.0000,185219.0000,185219.0000,185529.0000,184989.0000,185019.0000,185340.0000,184528.0000,185239.0000,186331.0000,185519.0000,185179.0000,264900.0000,186732.0000,186752.0000,185891.0000,186000.0000,184568.0000,185429.0000,186472.0000,186421.0000,185018.0000,186511.0000,185931.0000,186181.0000,187724.0000,186412.0000,185429.0000,186562.0000,185941.0000,185790.0000,185230.0000,186511.0000,194016.0000,185300.0000,185129.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,contracting tree topology,100,1,21380100,215863.9000,215616.3500,216309.1600,1647.0126,1078.2100,2807.9950,"215487.0000,215326.0000,227099.0000,215396.0000,215447.0000,215246.0000,215928.0000,213382.0000,214705.0000,215977.0000,214956.0000,216739.0000,215867.0000,215286.0000,215556.0000,214405.0000,214344.0000,221248.0000,215677.0000,214965.0000,216007.0000,214575.0000,216068.0000,215847.0000,215667.0000,215697.0000,216248.0000,217169.0000,215848.0000,214494.0000,214584.0000,215125.0000,216038.0000,216138.0000,215016.0000,216298.0000,220115.0000,216248.0000,216238.0000,215466.0000,215517.0000,216549.0000,215597.0000,214124.0000,215106.0000,215326.0000,215006.0000,214966.0000,215206.0000,216108.0000,215056.0000,214234.0000,215958.0000,216418.0000,219464.0000,216638.0000,215777.0000,215847.0000,215807.0000,215948.0000,215125.0000,215005.0000,214936.0000,215056.0000,214615.0000,215767.0000,214956.0000,215326.0000,215016.0000,214785.0000,215216.0000,214705.0000,215145.0000,220445.0000,215616.0000,214715.0000,215987.0000,215918.0000,216478.0000,214615.0000,215447.0000,215887.0000,215757.0000,215105.0000,215336.0000,216168.0000,215556.0000,215296.0000,216308.0000,217430.0000,215677.0000,218933.0000,215767.0000,215697.0000,216458.0000,215777.0000,215737.0000,216549.0000,215266.0000,216238.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,wave_sim topology,100,1,105271200,1091880.7200,1091036.3400,1093096.9100,5120.8849,3843.6072,8494.6921,"1092649.0000,1096807.0000,1123728.0000,1091376.0000,1085675.0000,1090014.0000,1085195.0000,1082680.0000,1080446.0000,1088351.0000,1085716.0000,1086618.0000,1090886.0000,1094934.0000,1090805.0000,1088401.0000,1096516.0000,1093090.0000,1090425.0000,1088331.0000,1098550.0000,1088982.0000,1092138.0000,1087991.0000,1097118.0000,1088851.0000,1088130.0000,1096436.0000,1089873.0000,1092088.0000,1088461.0000,1097127.0000,1091196.0000,1087199.0000,1087350.0000,1101395.0000,1088341.0000,1091117.0000,1093961.0000,1092640.0000,1093791.0000,1089514.0000,1092298.0000,1089203.0000,1087259.0000,1089703.0000,1092558.0000,1087329.0000,1087529.0000,1098540.0000,1086608.0000,1087320.0000,1088701.0000,1097829.0000,1091406.0000,1092929.0000,1088892.0000,1097408.0000,1092790.0000,1090485.0000,1096767.0000,1088461.0000,1090666.0000,1092178.0000,1097348.0000,1090655.0000,1094523.0000,1092368.0000,1097449.0000,1092248.0000,1093491.0000,1099392.0000,1090635.0000,1092650.0000,1092890.0000,1096025.0000,1089944.0000,1091477.0000,1090445.0000,1095845.0000,1089182.0000,1091507.0000,1101276.0000,1088591.0000,1088201.0000,1087680.0000,1096387.0000,1095033.0000,1091106.0000,1090094.0000,1099733.0000,1085676.0000,1089673.0000,1097127.0000,1089594.0000,1089884.0000,1090695.0000,1095795.0000,1089824.0000,1091878.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,jacobi topology,100,1,33919500,356168.7300,355884.0900,356588.1900,1737.6520,1341.4886,2522.9915,"356012.0000,356353.0000,365491.0000,357296.0000,356193.0000,354921.0000,355492.0000,354500.0000,354691.0000,360030.0000,355833.0000,356253.0000,357255.0000,357936.0000,355411.0000,356714.0000,355141.0000,356895.0000,356654.0000,355302.0000,360762.0000,355782.0000,355431.0000,355091.0000,355452.0000,354991.0000,355402.0000,356374.0000,355742.0000,355021.0000,355221.0000,359209.0000,356033.0000,355702.0000,356053.0000,355371.0000,356603.0000,355852.0000,355522.0000,355162.0000,355011.0000,355862.0000,359900.0000,355041.0000,354701.0000,355091.0000,354310.0000,355953.0000,357245.0000,356974.0000,354920.0000,354930.0000,355141.0000,359520.0000,356604.0000,356554.0000,355883.0000,355011.0000,355682.0000,354901.0000,355943.0000,354761.0000,355131.0000,355011.0000,355342.0000,360191.0000,355442.0000,355071.0000,355332.0000,355141.0000,355632.0000,356624.0000,355041.0000,354219.0000,355021.0000,354340.0000,360031.0000,354961.0000,355712.0000,354750.0000,355392.0000,355672.0000,356113.0000,356333.0000,356233.0000,356073.0000,356204.0000,360171.0000,356203.0000,356364.0000,356033.0000,355121.0000,355672.0000,357986.0000,356524.0000,355733.0000,356283.0000,354801.0000,360792.0000,355091.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,soup topology,100,1,130324800,1244412.0800,1216075.0700,1274212.8700,148228.2521,130030.9900,173227.3651,"1271749.0000,1308148.0000,1308909.0000,1300793.0000,1281066.0000,1301685.0000,1308267.0000,1342212.0000,1330430.0000,1307467.0000,1306004.0000,1298830.0000,1304521.0000,1307486.0000,1311273.0000,1271769.0000,1332935.0000,1339898.0000,1307897.0000,1299180.0000,1313447.0000,1304571.0000,1302848.0000,1312326.0000,1277129.0000,1302818.0000,1308779.0000,1365877.0000,1632914.0000,1623445.0000,1611953.0000,1632542.0000,1617725.0000,1374504.0000,1099722.0000,1247973.0000,1440228.0000,1458552.0000,1438575.0000,1052794.0000,1090004.0000,1044338.0000,1075828.0000,1059556.0000,1182509.0000,1325501.0000,1317235.0000,1225741.0000,1058815.0000,1043116.0000,1062872.0000,1087168.0000,1085074.0000,1039087.0000,1069134.0000,1060258.0000,1028187.0000,1054016.0000,1056270.0000,1061049.0000,1065808.0000,1065868.0000,1060097.0000,1069356.0000,1063424.0000,1044558.0000,1047624.0000,1023959.0000,1040751.0000,1072521.0000,1072661.0000,1314340.0000,1437823.0000,1269124.0000,1028738.0000,1046672.0000,1131803.0000,1308347.0000,1295183.0000,1282228.0000,1281648.0000,1270867.0000,1285404.0000,1301655.0000,1278241.0000,1305152.0000,1251330.0000,1278361.0000,1306845.0000,1275125.0000,1308980.0000,1251310.0000,1358543.0000,1275966.0000,1306424.0000,1310512.0000,1248184.0000,1303339.0000,1341150.0000,1276288.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,chain topology,100,1,29941800,297619.3700,289280.6400,306349.2900,43480.4432,40307.7321,47334.3779,"288535.0000,290578.0000,366372.0000,346505.0000,379137.0000,344491.0000,350192.0000,348448.0000,378787.0000,348378.0000,375329.0000,380179.0000,346004.0000,348919.0000,375941.0000,351254.0000,347617.0000,348348.0000,347817.0000,347987.0000,349510.0000,345904.0000,350673.0000,344712.0000,350442.0000,349130.0000,345132.0000,351103.0000,346856.0000,345994.0000,258999.0000,260662.0000,263037.0000,260392.0000,259771.0000,263848.0000,259530.0000,265501.0000,255753.0000,290900.0000,291761.0000,259670.0000,263397.0000,258228.0000,262886.0000,289967.0000,290820.0000,290418.0000,261764.0000,259360.0000,261343.0000,291049.0000,257687.0000,321007.0000,262796.0000,258969.0000,261344.0000,260933.0000,261995.0000,319854.0000,305678.0000,262596.0000,240755.0000,238861.0000,265070.0000,267545.0000,265191.0000,267114.0000,267024.0000,237929.0000,264950.0000,239663.0000,239662.0000,239513.0000,239682.0000,239993.0000,236176.0000,269048.0000,239352.0000,240123.0000,239311.0000,238049.0000,238711.0000,271983.0000,319283.0000,318903.0000,317520.0000,323281.0000,316047.0000,319002.0000,321046.0000,319524.0000,317630.0000,320565.0000,315195.0000,322840.0000,337668.0000,317891.0000,288495.0000,319153.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,expanding tree topology,100,1,36318700,383519.4300,375966.9100,391061.4800,38540.4332,35809.7200,42951.2702,"346174.0000,378986.0000,357516.0000,356294.0000,340583.0000,338280.0000,335374.0000,335464.0000,334141.0000,331567.0000,339502.0000,347066.0000,340824.0000,334362.0000,337818.0000,340935.0000,336746.0000,332098.0000,342868.0000,347527.0000,332638.0000,338831.0000,337217.0000,377283.0000,410255.0000,408944.0000,412760.0000,407030.0000,396199.0000,415165.0000,403152.0000,347707.0000,281281.0000,335253.0000,374659.0000,378425.0000,348529.0000,346986.0000,347667.0000,348228.0000,378425.0000,377924.0000,376612.0000,375490.0000,351054.0000,346264.0000,377483.0000,379156.0000,345883.0000,350252.0000,347556.0000,349420.0000,375881.0000,349531.0000,436215.0000,437898.0000,433450.0000,406760.0000,435093.0000,434702.0000,436676.0000,406399.0000,406108.0000,405838.0000,437577.0000,403684.0000,407580.0000,406829.0000,432889.0000,437387.0000,431587.0000,408873.0000,433911.0000,408091.0000,434892.0000,437728.0000,432969.0000,404114.0000,410516.0000,432598.0000,405727.0000,405106.0000,438559.0000,438520.0000,402020.0000,407601.0000,406930.0000,432959.0000,408883.0000,433579.0000,404454.0000,437517.0000,408713.0000,431055.0000,435063.0000,374848.0000,351324.0000,346805.0000,348529.0000,350122.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,contracting tree topology,100,1,38183100,426523.8600,418776.9800,434020.1500,38940.2952,34451.9951,44262.7851,"434982.0000,498674.0000,420495.0000,398343.0000,398704.0000,399465.0000,410746.0000,401199.0000,482262.0000,466592.0000,461213.0000,497982.0000,491570.0000,521146.0000,465942.0000,464498.0000,493143.0000,463928.0000,491149.0000,499345.0000,461022.0000,465541.0000,465901.0000,461172.0000,463086.0000,374208.0000,376351.0000,379758.0000,348649.0000,376081.0000,372935.0000,373056.0000,369739.0000,371703.0000,373727.0000,372514.0000,370921.0000,372364.0000,347426.0000,373245.0000,370480.0000,345774.0000,344691.0000,347156.0000,373666.0000,370520.0000,457195.0000,425244.0000,423751.0000,455572.0000,424162.0000,427267.0000,424182.0000,424222.0000,426216.0000,460942.0000,432247.0000,424092.0000,427278.0000,426777.0000,424703.0000,420716.0000,428370.0000,428000.0000,455842.0000,449039.0000,452396.0000,451374.0000,425494.0000,422479.0000,428130.0000,426556.0000,425124.0000,424973.0000,480048.0000,426296.0000,422670.0000,457054.0000,422318.0000,423691.0000,427478.0000,427278.0000,422128.0000,445723.0000,463287.0000,466563.0000,401559.0000,406018.0000,434010.0000,435874.0000,434392.0000,438429.0000,436636.0000,433400.0000,465260.0000,435554.0000,438660.0000,432498.0000,434742.0000,435413.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,wave_sim topology,100,1,112145000,1123712.8400,1098681.2500,1162427.2200,156168.3446,109461.5380,209730.1248,"1153454.0000,1136222.0000,1630118.0000,1741930.0000,1740477.0000,1714648.0000,1728514.0000,1602155.0000,1049878.0000,1235640.0000,1247482.0000,1272450.0000,1162632.0000,1222656.0000,947444.0000,1032786.0000,1115612.0000,1131392.0000,1100864.0000,1111444.0000,1110563.0000,1127646.0000,1112256.0000,1120021.0000,1087960.0000,1094803.0000,1117927.0000,1088581.0000,1145839.0000,1127164.0000,1094202.0000,1112617.0000,1114029.0000,1114270.0000,1090625.0000,1111284.0000,1094593.0000,1025492.0000,1006616.0000,1031273.0000,1056761.0000,1025853.0000,1017768.0000,1021364.0000,1030241.0000,1004582.0000,1001826.0000,993310.0000,1004322.0000,1008210.0000,995745.0000,1006096.0000,1023878.0000,1004692.0000,1003290.0000,1020162.0000,1020913.0000,1015162.0000,1028678.0000,1038106.0000,1022035.0000,1014160.0000,1025913.0000,1050910.0000,1013740.0000,1026774.0000,1024360.0000,1031373.0000,1104852.0000,1113368.0000,1133046.0000,1126924.0000,1107848.0000,1128006.0000,1111565.0000,1133767.0000,1101245.0000,1104342.0000,1105864.0000,1115172.0000,1072972.0000,1108029.0000,1115993.0000,1118287.0000,1104020.0000,1121995.0000,1126163.0000,1140039.0000,1113248.0000,1131142.0000,1126183.0000,1160468.0000,1131392.0000,1199802.0000,1126985.0000,1104852.0000,1102538.0000,1108379.0000,1099642.0000,1137373.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,jacobi topology,100,1,48835900,525035.9700,512145.9500,542371.9400,75981.3708,59835.4067,91746.3748,"493633.0000,491589.0000,734040.0000,690136.0000,690958.0000,693813.0000,692350.0000,768635.0000,727927.0000,723339.0000,697120.0000,721756.0000,697921.0000,726596.0000,729731.0000,603843.0000,491861.0000,524542.0000,523721.0000,492832.0000,520565.0000,521447.0000,523921.0000,552816.0000,494686.0000,494566.0000,491269.0000,494555.0000,491479.0000,520916.0000,492412.0000,493283.0000,494656.0000,495628.0000,492962.0000,523380.0000,492412.0000,493103.0000,494025.0000,492872.0000,494715.0000,489496.0000,497741.0000,521136.0000,522128.0000,492772.0000,494766.0000,493453.0000,494526.0000,490017.0000,522609.0000,491600.0000,525334.0000,490408.0000,495337.0000,491761.0000,492572.0000,492942.0000,493584.0000,523320.0000,521447.0000,482934.0000,482483.0000,477724.0000,502761.0000,480549.0000,478915.0000,486580.0000,473937.0000,480118.0000,480158.0000,474417.0000,482432.0000,479287.0000,477704.0000,477824.0000,503502.0000,480749.0000,498573.0000,495597.0000,493614.0000,496359.0000,492081.0000,491490.0000,492251.0000,476351.0000,478194.0000,478094.0000,481531.0000,477292.0000,481410.0000,476120.0000,488855.0000,493614.0000,491239.0000,496228.0000,491830.0000,493413.0000,495628.0000,522799.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,soup topology,100,1,260149500,2643829.6400,2639918.9600,2648517.2900,21804.8042,18910.0898,24489.5843,"2632318.0000,2633199.0000,2634642.0000,2632378.0000,2672053.0000,2626336.0000,2635914.0000,2686120.0000,2625364.0000,2635383.0000,2677944.0000,2625514.0000,2637297.0000,2672764.0000,2631506.0000,2634001.0000,2637147.0000,2626957.0000,2693473.0000,2635333.0000,2628641.0000,2674638.0000,2633820.0000,2631496.0000,2632788.0000,2689296.0000,2627739.0000,2630484.0000,2622759.0000,2632849.0000,2680028.0000,2626497.0000,2630914.0000,2670389.0000,2630996.0000,2630484.0000,2628871.0000,2632557.0000,2690528.0000,2632047.0000,2630263.0000,2628560.0000,2677984.0000,2626246.0000,2629822.0000,2670750.0000,2631816.0000,2634462.0000,2630865.0000,2628741.0000,2682502.0000,2636395.0000,2630734.0000,2685838.0000,2639822.0000,2627909.0000,2630625.0000,2691409.0000,2639891.0000,2626767.0000,2639000.0000,2630495.0000,2679226.0000,2634471.0000,2634972.0000,2632298.0000,2676602.0000,2628680.0000,2633669.0000,2686429.0000,2628310.0000,2638459.0000,2641976.0000,2629523.0000,2682312.0000,2632538.0000,2628710.0000,2625304.0000,2688454.0000,2628861.0000,2631496.0000,2671121.0000,2628591.0000,2635413.0000,2623059.0000,2625785.0000,2696309.0000,2629663.0000,2633730.0000,2632347.0000,2629843.0000,2677223.0000,2633730.0000,2636345.0000,2668356.0000,2641345.0000,2627488.0000,2636194.0000,2668506.0000,2631266.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,chain topology,100,1,42385500,423334.2800,421758.4200,426384.3800,10691.5957,6004.8444,16380.1475,"421367.0000,420976.0000,427819.0000,423070.0000,421267.0000,421367.0000,421036.0000,421287.0000,421126.0000,421708.0000,420966.0000,420615.0000,420765.0000,422299.0000,420836.0000,420815.0000,421387.0000,420565.0000,420715.0000,420996.0000,420726.0000,421026.0000,480008.0000,421597.0000,421036.0000,420776.0000,420665.0000,421207.0000,420916.0000,420826.0000,420966.0000,420775.0000,424383.0000,420625.0000,420716.0000,420265.0000,420074.0000,420536.0000,420204.0000,420806.0000,420636.0000,423951.0000,420505.0000,420364.0000,420095.0000,420775.0000,420105.0000,420144.0000,420766.0000,420736.0000,420675.0000,463156.0000,421437.0000,421036.0000,420796.0000,421196.0000,421067.0000,421216.0000,421227.0000,420876.0000,429512.0000,421116.0000,421097.0000,421677.0000,421177.0000,421056.0000,421347.0000,421086.0000,420786.0000,481811.0000,421728.0000,420455.0000,420736.0000,420835.0000,421056.0000,420996.0000,420926.0000,421036.0000,420825.0000,426656.0000,420956.0000,421106.0000,420465.0000,420515.0000,420365.0000,420185.0000,420154.0000,420234.0000,422740.0000,420595.0000,420435.0000,420084.0000,420465.0000,420374.0000,420666.0000,420966.0000,420675.0000,420676.0000,474127.0000,421857.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,expanding tree topology,100,1,49211300,493179.0900,491503.7700,496300.4400,11358.6414,6865.4834,17031.0635,"489326.0000,491009.0000,504705.0000,492061.0000,491440.0000,491239.0000,491420.0000,490808.0000,491450.0000,490398.0000,490107.0000,489696.0000,491209.0000,491289.0000,490738.0000,489536.0000,532167.0000,489406.0000,491119.0000,490468.0000,490257.0000,490037.0000,490227.0000,490618.0000,496389.0000,491299.0000,490478.0000,490518.0000,490087.0000,489657.0000,489345.0000,489336.0000,495267.0000,489215.0000,489525.0000,489435.0000,490268.0000,489576.0000,488183.0000,490327.0000,551584.0000,490598.0000,488995.0000,490177.0000,490307.0000,489966.0000,490238.0000,490477.0000,495747.0000,489306.0000,489125.0000,489777.0000,489506.0000,489606.0000,489426.0000,491149.0000,534711.0000,490759.0000,489716.0000,490237.0000,490327.0000,490848.0000,490397.0000,489867.0000,495056.0000,490328.0000,491069.0000,490588.0000,490027.0000,490207.0000,490097.0000,490157.0000,490778.0000,490007.0000,489506.0000,490859.0000,490518.0000,490357.0000,489956.0000,489536.0000,555400.0000,491780.0000,490668.0000,490247.0000,489676.0000,488794.0000,489045.0000,489004.0000,489636.0000,495247.0000,490808.0000,489676.0000,489976.0000,490488.0000,490498.0000,492371.0000,491239.0000,530483.0000,490688.0000,490658.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,contracting tree topology,100,1,52020800,521654.4200,520041.9100,524654.1700,10967.4263,6701.1436,15805.0636,"519583.0000,518691.0000,524442.0000,520194.0000,519383.0000,519583.0000,518360.0000,519153.0000,518180.0000,518100.0000,517549.0000,518922.0000,569527.0000,520164.0000,518841.0000,518792.0000,519392.0000,519393.0000,520264.0000,518892.0000,523660.0000,519182.0000,518962.0000,518370.0000,518521.0000,519843.0000,519443.0000,519743.0000,568827.0000,518701.0000,518691.0000,517279.0000,519352.0000,518511.0000,518782.0000,519402.0000,518862.0000,518621.0000,518311.0000,518871.0000,519423.0000,519232.0000,518962.0000,520004.0000,519723.0000,519062.0000,518682.0000,518841.0000,517739.0000,518661.0000,519473.0000,562304.0000,517830.0000,517669.0000,517670.0000,518390.0000,517419.0000,519503.0000,521547.0000,519042.0000,518040.0000,519072.0000,518511.0000,519783.0000,518882.0000,518851.0000,521747.0000,519193.0000,518831.0000,519032.0000,518281.0000,517890.0000,518421.0000,518481.0000,576130.0000,519833.0000,518320.0000,518441.0000,518431.0000,518521.0000,518951.0000,523611.0000,518952.0000,519272.0000,518471.0000,517389.0000,518350.0000,518530.0000,518550.0000,567764.0000,519473.0000,519072.0000,519332.0000,520053.0000,519322.0000,518731.0000,519813.0000,526296.0000,519242.0000,519062.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,wave_sim topology,100,1,314673000,3149747.3900,3145619.6300,3154507.7000,22516.0364,19881.0153,24940.9522,"3137084.0000,3135912.0000,3173052.0000,3143698.0000,3138987.0000,3167762.0000,3130912.0000,3132576.0000,3134388.0000,3195584.0000,3142745.0000,3143116.0000,3139859.0000,3181338.0000,3138337.0000,3137916.0000,3191768.0000,3132736.0000,3133397.0000,3171119.0000,3132576.0000,3171359.0000,3137816.0000,3140030.0000,3137254.0000,3194173.0000,3135942.0000,3136202.0000,3175427.0000,3136533.0000,3138117.0000,3131403.0000,3198861.0000,3137395.0000,3136242.0000,3187781.0000,3132585.0000,3131354.0000,3188552.0000,3133136.0000,3131152.0000,3131433.0000,3180256.0000,3138627.0000,3135070.0000,3130673.0000,3170447.0000,3128929.0000,3186588.0000,3137315.0000,3135792.0000,3175346.0000,3135221.0000,3136534.0000,3132906.0000,3174144.0000,3137054.0000,3138587.0000,3180897.0000,3129760.0000,3137916.0000,3140871.0000,3201235.0000,3134950.0000,3134600.0000,3178793.0000,3135350.0000,3135020.0000,3172893.0000,3135250.0000,3141884.0000,3184334.0000,3138196.0000,3135962.0000,3138728.0000,3193661.0000,3132326.0000,3134229.0000,3132445.0000,3166310.0000,3134289.0000,3135591.0000,3191027.0000,3133528.0000,3135631.0000,3138587.0000,3185797.0000,3132816.0000,3138336.0000,3179014.0000,3135201.0000,3134329.0000,3131924.0000,3187290.0000,3135391.0000,3132796.0000,3198471.0000,3131604.0000,3132466.0000,3183943.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,jacobi topology,100,1,86411600,864048.3800,861659.4700,867886.3300,15214.5337,10620.6407,20230.1127,"858125.0000,858225.0000,870608.0000,860690.0000,860479.0000,912238.0000,860218.0000,857934.0000,857252.0000,865198.0000,858956.0000,856471.0000,857684.0000,857674.0000,923418.0000,859727.0000,859017.0000,858555.0000,865478.0000,859598.0000,857172.0000,858164.0000,857814.0000,859948.0000,859157.0000,858936.0000,857103.0000,858746.0000,918930.0000,859407.0000,858866.0000,858496.0000,862763.0000,858516.0000,858194.0000,859848.0000,858566.0000,903881.0000,859597.0000,858555.0000,858545.0000,858395.0000,863815.0000,859247.0000,856933.0000,857533.0000,859748.0000,858666.0000,857814.0000,857122.0000,858254.0000,924140.0000,857273.0000,858105.0000,857613.0000,863023.0000,859638.0000,859938.0000,859047.0000,859938.0000,864206.0000,860499.0000,860248.0000,859938.0000,859337.0000,916285.0000,861792.0000,860629.0000,860188.0000,862402.0000,858506.0000,858606.0000,859136.0000,858887.0000,865498.0000,859968.0000,860659.0000,859758.0000,858785.0000,903981.0000,859046.0000,859677.0000,859758.0000,859677.0000,859477.0000,860229.0000,859828.0000,859207.0000,914431.0000,859307.0000,857994.0000,858596.0000,864587.0000,859367.0000,859627.0000,859107.0000,859307.0000,863595.0000,859096.0000,858626.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,soup topology,100,1,228342300,2270284.2400,2249318.7800,2289612.4900,103417.8410,92270.1671,117373.6435,"2322992.0000,2320366.0000,2287914.0000,2295398.0000,2322129.0000,2346856.0000,2296701.0000,2292433.0000,2325025.0000,2347959.0000,2329213.0000,2343930.0000,2330996.0000,2347137.0000,2344833.0000,2325276.0000,2352998.0000,2352076.0000,2349140.0000,2363437.0000,2351024.0000,2288997.0000,2317651.0000,2321528.0000,2336056.0000,2338150.0000,2301761.0000,2346666.0000,2290009.0000,2325967.0000,2349281.0000,2322811.0000,2331457.0000,2285730.0000,2321678.0000,2296121.0000,2319485.0000,2355883.0000,2352106.0000,2319104.0000,2491120.0000,2554751.0000,2326899.0000,2383706.0000,2346606.0000,2342368.0000,2313744.0000,2150424.0000,2103435.0000,2130426.0000,2129575.0000,2110188.0000,2085371.0000,2079990.0000,2118644.0000,2097935.0000,2101842.0000,2105720.0000,2139814.0000,2117982.0000,2275381.0000,2324114.0000,2324254.0000,2355944.0000,2319635.0000,2318212.0000,2321498.0000,2323772.0000,2292864.0000,2339372.0000,2343890.0000,2287614.0000,2294888.0000,2323152.0000,2330345.0000,2315377.0000,2320376.0000,2355193.0000,2317601.0000,2317902.0000,2323652.0000,2169009.0000,2136688.0000,2120658.0000,2130346.0000,2099057.0000,2109897.0000,2098786.0000,2106682.0000,2110268.0000,2126639.0000,2102924.0000,2107292.0000,2103585.0000,2080982.0000,2121709.0000,2313723.0000,2319054.0000,2326468.0000,2320807.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,chain topology,100,1,49675400,493717.0200,489160.7100,497871.2400,22202.1104,18599.3935,27340.4747,"524773.0000,492021.0000,445302.0000,425194.0000,417790.0000,434070.0000,459288.0000,553688.0000,503192.0000,479748.0000,480338.0000,478735.0000,477564.0000,505987.0000,506468.0000,529581.0000,508822.0000,505216.0000,477964.0000,477733.0000,507591.0000,489275.0000,468105.0000,491430.0000,493884.0000,464128.0000,495367.0000,492051.0000,494475.0000,492531.0000,549399.0000,495417.0000,490347.0000,495558.0000,493022.0000,494174.0000,493844.0000,522208.0000,495217.0000,493624.0000,523480.0000,518421.0000,494816.0000,494816.0000,491319.0000,496710.0000,521587.0000,519794.0000,495998.0000,491470.0000,494315.0000,521677.0000,493433.0000,493734.0000,463366.0000,524663.0000,491700.0000,523450.0000,465810.0000,490868.0000,494406.0000,463396.0000,492181.0000,467594.0000,520525.0000,492291.0000,495748.0000,492041.0000,464769.0000,523770.0000,521556.0000,522558.0000,465560.0000,490378.0000,497982.0000,490648.0000,494575.0000,521196.0000,493954.0000,490508.0000,494656.0000,464238.0000,495176.0000,463086.0000,491891.0000,495517.0000,492671.0000,493353.0000,522378.0000,493864.0000,491730.0000,493514.0000,494185.0000,522498.0000,494315.0000,493293.0000,492331.0000,495267.0000,490899.0000,494656.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,expanding tree topology,100,1,53716100,558604.2300,553378.3000,563627.5600,26094.3379,24125.4091,28442.6217,"579326.0000,524612.0000,597280.0000,554559.0000,577773.0000,583684.0000,547065.0000,579887.0000,587101.0000,547767.0000,579436.0000,580097.0000,581280.0000,609874.0000,552375.0000,550341.0000,580499.0000,580979.0000,574828.0000,581580.0000,522608.0000,579346.0000,579176.0000,577813.0000,523039.0000,523440.0000,573324.0000,578484.0000,584837.0000,522479.0000,578344.0000,582423.0000,518340.0000,525684.0000,523089.0000,522038.0000,522819.0000,578133.0000,522618.0000,522778.0000,524332.0000,521035.0000,579075.0000,523871.0000,524562.0000,520505.0000,523811.0000,549911.0000,582552.0000,579486.0000,521998.0000,552936.0000,552225.0000,577012.0000,521366.0000,581480.0000,583965.0000,606157.0000,550372.0000,583845.0000,580228.0000,550351.0000,609774.0000,520806.0000,582702.0000,552505.0000,577603.0000,583785.0000,550361.0000,552506.0000,550311.0000,553808.0000,583775.0000,546514.0000,582452.0000,578965.0000,580599.0000,580428.0000,548428.0000,584216.0000,579777.0000,522499.0000,582151.0000,549189.0000,580108.0000,523691.0000,521176.0000,525354.0000,577273.0000,523590.0000,579336.0000,526175.0000,550401.0000,560050.0000,552165.0000,581149.0000,548688.0000,522639.0000,580729.0000,552515.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,contracting tree topology,100,1,54528100,547150.1700,539197.1300,555404.2600,41470.6995,38203.6540,45050.4370,"491650.0000,516266.0000,566201.0000,564859.0000,559008.0000,556233.0000,615665.0000,555712.0000,586961.0000,556743.0000,559860.0000,560781.0000,554429.0000,561573.0000,557655.0000,556874.0000,562805.0000,558456.0000,557745.0000,584717.0000,557334.0000,559689.0000,576490.0000,608631.0000,581029.0000,607911.0000,611147.0000,611046.0000,579878.0000,611737.0000,605115.0000,582983.0000,578355.0000,611046.0000,609684.0000,609483.0000,581680.0000,609132.0000,608231.0000,610055.0000,608862.0000,582382.0000,610625.0000,605797.0000,582142.0000,608852.0000,611337.0000,606878.0000,549861.0000,581380.0000,590057.0000,527177.0000,534632.0000,498412.0000,517338.0000,483895.0000,490147.0000,503131.0000,510727.0000,486430.0000,518531.0000,492381.0000,523480.0000,513521.0000,506017.0000,491079.0000,502701.0000,507661.0000,522028.0000,499585.0000,526015.0000,523360.0000,499245.0000,514373.0000,510235.0000,505737.0000,491429.0000,565079.0000,528680.0000,528189.0000,561282.0000,531396.0000,530393.0000,529021.0000,510085.0000,515726.0000,505216.0000,486760.0000,483674.0000,481240.0000,515886.0000,511177.0000,519513.0000,515666.0000,510986.0000,508913.0000,497261.0000,505366.0000,531475.0000,509744.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,wave_sim topology,100,1,297994100,3017446.2000,2985539.5500,3056027.8300,179435.4107,147857.3027,214251.5070,"2985497.0000,3094494.0000,3485765.0000,3107659.0000,3041533.0000,3147203.0000,3417014.0000,3430721.0000,3441962.0000,3483210.0000,3465066.0000,3476698.0000,3490925.0000,3483150.0000,3530931.0000,3029220.0000,3043386.0000,2993742.0000,3020904.0000,3049779.0000,3018139.0000,3052223.0000,3018789.0000,3015855.0000,3018389.0000,2990366.0000,2993452.0000,2868044.0000,2847435.0000,2802430.0000,2804664.0000,2809393.0000,2815385.0000,2842816.0000,2861321.0000,2804684.0000,2783384.0000,2812368.0000,2786870.0000,2799213.0000,2844900.0000,2817197.0000,2825053.0000,2820915.0000,2796519.0000,2857124.0000,2847716.0000,2822788.0000,2826767.0000,2932206.0000,3027647.0000,3015263.0000,3048006.0000,3078123.0000,3021285.0000,2986388.0000,3015303.0000,3051622.0000,3055770.0000,3067893.0000,2994244.0000,3089484.0000,2995405.0000,3041232.0000,3051923.0000,3041032.0000,3047394.0000,3055710.0000,3076920.0000,3041984.0000,3057193.0000,2979576.0000,3000355.0000,2870790.0000,2861152.0000,2824942.0000,2833709.0000,2810986.0000,2858687.0000,2858095.0000,2959247.0000,3078803.0000,2997359.0000,3072942.0000,3061782.0000,3046643.0000,3058696.0000,3051933.0000,2988743.0000,3004213.0000,3021415.0000,2984004.0000,3082510.0000,3019030.0000,3047705.0000,3017247.0000,3023769.0000,3045180.0000,3028208.0000,3041814.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,jacobi topology,100,1,86665200,893464.7400,885433.6000,900548.6400,38529.4256,33353.3383,44449.8583,"880147.0000,877371.0000,840381.0000,802249.0000,848076.0000,871580.0000,805846.0000,808711.0000,803952.0000,828178.0000,818489.0000,806005.0000,798912.0000,822698.0000,802088.0000,809482.0000,860729.0000,928788.0000,930222.0000,899794.0000,900104.0000,871319.0000,899253.0000,899283.0000,871991.0000,928608.0000,930422.0000,927045.0000,872411.0000,927556.0000,898672.0000,929149.0000,900295.0000,902629.0000,893371.0000,900405.0000,897089.0000,931524.0000,930992.0000,955469.0000,898992.0000,926394.0000,903351.0000,929400.0000,868685.0000,930993.0000,868424.0000,929920.0000,929850.0000,899223.0000,928648.0000,872000.0000,873473.0000,925453.0000,958284.0000,931543.0000,924531.0000,873223.0000,929369.0000,928468.0000,870699.0000,868845.0000,875017.0000,928218.0000,926825.0000,871950.0000,869265.0000,903020.0000,927577.0000,930732.0000,956381.0000,930031.0000,899393.0000,898321.0000,871701.0000,897880.0000,874937.0000,898021.0000,929440.0000,928718.0000,899243.0000,928057.0000,871090.0000,873003.0000,925913.0000,903200.0000,929200.0000,897820.0000,900305.0000,929470.0000,927687.0000,903811.0000,872192.0000,896938.0000,918259.0000,922386.0000,892189.0000,894664.0000,873464.0000,889103.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,soup topology,100,1,195736200,2036080.1200,2015599.3900,2048789.5200,80917.5253,53860.0561,110915.2760,"2057428.0000,2045665.0000,2248129.0000,2067707.0000,2070132.0000,2057518.0000,2060755.0000,2060544.0000,2056987.0000,2047890.0000,2061094.0000,2050324.0000,2056256.0000,2051998.0000,2057317.0000,2054792.0000,2049632.0000,2053670.0000,2053892.0000,2048832.0000,2056907.0000,2052308.0000,2053580.0000,2061516.0000,2050134.0000,2054993.0000,2050104.0000,2059782.0000,2051336.0000,2052338.0000,2051256.0000,2052689.0000,2055594.0000,2070382.0000,2043611.0000,2050836.0000,2049803.0000,2048841.0000,2048892.0000,2058961.0000,2052608.0000,2061505.0000,2052199.0000,2055324.0000,2051566.0000,2054862.0000,2048921.0000,2060133.0000,2048240.0000,2062527.0000,2047079.0000,2051577.0000,2048591.0000,2049312.0000,2052117.0000,2058991.0000,2048250.0000,2053820.0000,1729085.0000,1757519.0000,1710480.0000,1715429.0000,1706382.0000,1738003.0000,2054713.0000,2061024.0000,2051456.0000,2055394.0000,2049603.0000,2058720.0000,2051447.0000,2056315.0000,2047468.0000,2054141.0000,2049914.0000,2057909.0000,2044163.0000,2058089.0000,2054071.0000,2055263.0000,2048250.0000,2054792.0000,2051887.0000,2056155.0000,2054492.0000,2054091.0000,2054042.0000,2052829.0000,2050495.0000,2057558.0000,2052579.0000,2056897.0000,2047890.0000,2055665.0000,2049142.0000,2050705.0000,2049723.0000,2048621.0000,2055674.0000,2047890.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,chain topology,100,1,37743000,377710.0800,373182.1600,381332.1200,20580.4385,17179.0603,23508.5332,"335704.0000,335083.0000,398353.0000,387312.0000,392623.0000,387252.0000,387983.0000,387142.0000,386030.0000,387383.0000,386581.0000,386851.0000,386982.0000,387232.0000,392131.0000,387703.0000,387463.0000,386110.0000,386821.0000,387032.0000,387503.0000,387883.0000,388044.0000,387763.0000,391390.0000,388294.0000,387653.0000,387282.0000,386801.0000,386952.0000,386430.0000,389036.0000,386030.0000,386130.0000,387583.0000,399225.0000,387052.0000,388444.0000,387703.0000,387673.0000,387332.0000,387342.0000,386440.0000,387372.0000,387673.0000,391921.0000,388625.0000,387753.0000,387603.0000,386641.0000,385799.0000,387853.0000,386901.0000,387503.0000,386520.0000,393394.0000,386721.0000,387652.0000,388153.0000,386450.0000,386040.0000,386321.0000,387112.0000,387402.0000,386210.0000,386210.0000,392181.0000,385860.0000,387402.0000,385970.0000,385809.0000,386350.0000,384888.0000,386991.0000,386180.0000,387642.0000,391149.0000,386060.0000,387472.0000,386200.0000,386911.0000,386321.0000,370540.0000,334963.0000,333140.0000,333380.0000,335083.0000,343630.0000,334332.0000,334101.0000,335174.0000,334672.0000,333531.0000,333610.0000,335264.0000,334903.0000,335113.0000,335004.0000,335383.0000,346214.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,expanding tree topology,100,1,46327400,435948.4700,430394.0500,441251.9400,27895.6483,27020.6229,28629.4687,"465099.0000,461193.0000,428430.0000,402601.0000,404525.0000,404555.0000,403052.0000,404755.0000,404305.0000,411037.0000,403393.0000,405808.0000,405637.0000,404605.0000,403553.0000,404215.0000,404164.0000,405036.0000,402982.0000,427979.0000,404234.0000,403062.0000,402812.0000,405737.0000,409014.0000,407450.0000,408713.0000,406198.0000,415626.0000,406298.0000,407480.0000,405537.0000,406348.0000,407190.0000,402812.0000,402441.0000,402852.0000,404024.0000,411518.0000,404134.0000,403463.0000,404936.0000,404194.0000,402421.0000,404715.0000,404725.0000,402971.0000,403783.0000,431997.0000,461964.0000,460531.0000,459990.0000,460952.0000,461523.0000,459870.0000,461233.0000,459819.0000,466081.0000,461844.0000,460721.0000,460872.0000,464859.0000,459189.0000,460821.0000,458647.0000,464278.0000,462976.0000,461754.0000,460851.0000,460941.0000,460591.0000,461333.0000,459409.0000,461753.0000,467164.0000,460912.0000,461633.0000,461894.0000,463376.0000,464268.0000,465270.0000,460481.0000,464568.0000,468466.0000,461603.0000,460851.0000,463306.0000,462805.0000,460161.0000,460330.0000,459859.0000,466813.0000,461372.0000,461653.0000,459408.0000,460751.0000,458968.0000,461924.0000,460210.0000,460390.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,contracting tree topology,100,1,47316000,498230.4900,497442.9600,500417.7300,6140.5911,2743.2285,13029.0733,"496148.0000,496910.0000,510906.0000,498903.0000,496789.0000,494755.0000,495848.0000,495046.0000,496129.0000,494185.0000,500537.0000,496860.0000,496760.0000,494305.0000,496409.0000,495437.0000,493684.0000,494926.0000,501989.0000,496229.0000,496138.0000,495667.0000,496379.0000,494546.0000,495607.0000,494836.0000,495848.0000,503643.0000,495727.0000,496329.0000,497992.0000,499014.0000,498463.0000,496429.0000,496950.0000,503432.0000,498723.0000,499013.0000,497040.0000,495547.0000,497752.0000,499014.0000,497972.0000,503913.0000,498062.0000,497371.0000,496559.0000,497161.0000,497160.0000,498042.0000,494917.0000,514393.0000,497231.0000,496629.0000,498323.0000,498774.0000,498493.0000,498343.0000,497090.0000,501879.0000,498062.0000,498503.0000,496339.0000,496930.0000,498203.0000,495698.0000,497621.0000,502681.0000,495527.0000,495467.0000,496960.0000,496650.0000,498002.0000,499104.0000,498002.0000,502911.0000,498543.0000,497651.0000,498764.0000,498423.0000,497761.0000,497932.0000,500798.0000,503973.0000,496049.0000,496479.0000,495948.0000,495246.0000,495687.0000,498012.0000,496138.0000,551122.0000,498143.0000,494154.0000,494826.0000,496649.0000,495848.0000,495989.0000,494094.0000,499004.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,wave_sim topology,100,1,227462500,2233613.7000,2205765.5900,2254941.6700,124485.5957,101393.3498,145584.7436,"2301309.0000,2294627.0000,2393525.0000,2316108.0000,2292013.0000,2294608.0000,2293886.0000,2291852.0000,2288706.0000,2285270.0000,2349792.0000,2288757.0000,2291952.0000,2296431.0000,2288586.0000,2295088.0000,2290951.0000,2294858.0000,2286442.0000,2292904.0000,2308874.0000,2288056.0000,2286762.0000,2282804.0000,2288486.0000,2274960.0000,2286202.0000,2288236.0000,2281632.0000,2286442.0000,2278958.0000,2290960.0000,2285740.0000,2292283.0000,2290078.0000,2276984.0000,2287464.0000,2281543.0000,2286372.0000,2286843.0000,2293385.0000,2288375.0000,2283757.0000,2292503.0000,2283276.0000,2288055.0000,2278988.0000,2140335.0000,1942199.0000,1943983.0000,1942350.0000,1950545.0000,1942661.0000,2242989.0000,2261174.0000,2274710.0000,2270121.0000,2270211.0000,2274969.0000,2274920.0000,2275130.0000,2135687.0000,1954873.0000,1935096.0000,2221990.0000,2289367.0000,2288497.0000,2308383.0000,2270171.0000,2275260.0000,2282034.0000,2291271.0000,2291070.0000,2276945.0000,2283777.0000,2279428.0000,2283987.0000,2283977.0000,2286983.0000,2287184.0000,2280861.0000,2292092.0000,1952829.0000,1967968.0000,1945986.0000,1949232.0000,1939844.0000,1958300.0000,1935908.0000,1946798.0000,2138311.0000,2303674.0000,2293165.0000,2303183.0000,2289477.0000,2300820.0000,2372275.0000,2294857.0000,2300578.0000,2297252.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,jacobi topology,100,1,80942500,789390.8500,776880.2700,801290.0000,62343.4509,59792.3968,64867.5437,"839128.0000,838187.0000,895595.0000,731444.0000,719412.0000,716546.0000,727908.0000,713822.0000,717738.0000,714412.0000,716186.0000,724882.0000,714452.0000,713210.0000,714022.0000,712749.0000,716386.0000,726365.0000,715033.0000,715684.0000,713771.0000,714823.0000,715454.0000,723669.0000,714382.0000,713781.0000,714212.0000,713992.0000,719422.0000,713981.0000,711567.0000,714983.0000,713951.0000,716797.0000,721646.0000,711577.0000,709613.0000,716396.0000,714372.0000,722007.0000,718249.0000,715956.0000,714362.0000,714813.0000,715955.0000,805154.0000,846041.0000,840631.0000,838989.0000,838828.0000,844279.0000,838066.0000,836564.0000,837626.0000,842425.0000,842054.0000,841513.0000,838747.0000,840340.0000,846232.0000,841553.0000,838468.0000,839680.0000,841543.0000,852584.0000,841724.0000,840581.0000,840811.0000,844369.0000,845721.0000,839960.0000,842956.0000,842445.0000,844960.0000,840421.0000,840041.0000,838017.0000,837896.0000,847645.0000,842385.0000,842485.0000,842896.0000,840511.0000,849007.0000,841172.0000,841382.0000,838246.0000,838707.0000,844478.0000,841934.0000,840651.0000,839439.0000,848496.0000,843116.0000,841222.0000,844909.0000,840301.0000,846964.0000,837175.0000,841853.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,soup topology,100,1,143901600,1410411.0300,1388597.7000,1450727.2300,147296.1318,92364.8846,237748.4982,"1422645.0000,1424438.0000,1942851.0000,1917773.0000,2036338.0000,2349121.0000,1616222.0000,1400022.0000,1383931.0000,1391305.0000,1375225.0000,1378992.0000,1424578.0000,1400021.0000,1390704.0000,1398929.0000,1381356.0000,1394562.0000,1398078.0000,1450918.0000,1423586.0000,1420500.0000,1397757.0000,1393329.0000,1386507.0000,1451038.0000,1396465.0000,1396315.0000,1423086.0000,1396445.0000,1391495.0000,1424388.0000,1447892.0000,1394451.0000,1388981.0000,1290794.0000,1311744.0000,1328296.0000,1336291.0000,1270396.0000,1304540.0000,1382860.0000,1298289.0000,1272741.0000,1291286.0000,1278241.0000,1311114.0000,1283440.0000,1409770.0000,1396856.0000,1397186.0000,1355387.0000,1426422.0000,1396765.0000,1353774.0000,1395985.0000,1396846.0000,1386997.0000,1401244.0000,1393900.0000,1423376.0000,1420130.0000,1408648.0000,1398760.0000,1386296.0000,1390945.0000,1395543.0000,1398298.0000,1396686.0000,1389251.0000,1429698.0000,1410852.0000,1399450.0000,1394040.0000,1402647.0000,1318407.0000,1288711.0000,1280876.0000,1331322.0000,1308719.0000,1271919.0000,1281056.0000,1269184.0000,1352191.0000,1391525.0000,1445518.0000,1465686.0000,1394421.0000,1409520.0000,1392287.0000,1422113.0000,1488059.0000,1386116.0000,1392368.0000,1420180.0000,1369524.0000,1424678.0000,1393790.0000,1391846.0000,1425059.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,chain topology,100,1,54331400,584265.3100,567490.6200,607720.0800,100372.5052,77269.6182,123605.4333,"551774.0000,550321.0000,748537.0000,870208.0000,877481.0000,862553.0000,872652.0000,869687.0000,843867.0000,870508.0000,867142.0000,872592.0000,860559.0000,697981.0000,614503.0000,581230.0000,560691.0000,558617.0000,545622.0000,559388.0000,559960.0000,583344.0000,546053.0000,558587.0000,561753.0000,558106.0000,559078.0000,557044.0000,573485.0000,561211.0000,511317.0000,517509.0000,510075.0000,503262.0000,510335.0000,480418.0000,495177.0000,519984.0000,493483.0000,499725.0000,500767.0000,487943.0000,507369.0000,509483.0000,502691.0000,506038.0000,528199.0000,529772.0000,564498.0000,567013.0000,534121.0000,560440.0000,569257.0000,559468.0000,560580.0000,557214.0000,560571.0000,533429.0000,542577.0000,559058.0000,561412.0000,584606.0000,544641.0000,531686.0000,531956.0000,561292.0000,555341.0000,579948.0000,550081.0000,580178.0000,551774.0000,550462.0000,552706.0000,580198.0000,580017.0000,550922.0000,553357.0000,549259.0000,583244.0000,580709.0000,580999.0000,550431.0000,553808.0000,550361.0000,553908.0000,549290.0000,549449.0000,581099.0000,580779.0000,554900.0000,550541.0000,549620.0000,552736.0000,551844.0000,551965.0000,549750.0000,580328.0000,551794.0000,580929.0000,551904.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,expanding tree topology,100,1,70215500,731552.2900,714898.1600,755148.9500,99750.5928,74535.9430,126022.5363,"726125.0000,698652.0000,1021624.0000,1019851.0000,1041181.0000,1003961.0000,1012778.0000,1016605.0000,1065528.0000,1039128.0000,1074484.0000,720984.0000,727196.0000,726706.0000,725042.0000,724571.0000,720574.0000,729231.0000,722337.0000,731014.0000,723149.0000,724792.0000,723359.0000,764146.0000,720855.0000,730433.0000,720824.0000,723248.0000,728138.0000,674096.0000,667504.0000,642426.0000,669046.0000,662574.0000,646724.0000,646053.0000,659408.0000,654839.0000,667092.0000,665079.0000,637436.0000,721636.0000,719272.0000,697620.0000,697219.0000,698052.0000,726174.0000,721425.0000,699725.0000,725223.0000,699434.0000,722749.0000,704423.0000,720424.0000,726024.0000,724631.0000,725574.0000,725373.0000,696258.0000,727257.0000,724521.0000,728639.0000,721786.0000,701999.0000,722147.0000,728218.0000,723259.0000,726876.0000,726285.0000,724141.0000,728729.0000,720413.0000,694765.0000,695987.0000,704914.0000,722918.0000,723509.0000,657835.0000,662724.0000,663145.0000,711948.0000,637516.0000,678394.0000,654298.0000,661802.0000,641765.0000,662754.0000,665480.0000,674206.0000,638348.0000,628109.0000,653837.0000,785437.0000,711947.0000,698212.0000,726245.0000,699404.0000,720063.0000,725764.0000,699604.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,contracting tree topology,100,1,73263700,750304.5500,731134.2000,778112.8900,116610.0926,86272.0872,148345.7386,"720374.0000,727898.0000,1106956.0000,1172240.0000,1098891.0000,1073222.0000,1109350.0000,1122145.0000,1078352.0000,1099061.0000,1110283.0000,743648.0000,722568.0000,726525.0000,727578.0000,748607.0000,730854.0000,722848.0000,726335.0000,720163.0000,727698.0000,730342.0000,747745.0000,758085.0000,725624.0000,726485.0000,727337.0000,720664.0000,729360.0000,725504.0000,723139.0000,752605.0000,727818.0000,755690.0000,713911.0000,720494.0000,715514.0000,728429.0000,709964.0000,727878.0000,725012.0000,725853.0000,725753.0000,791468.0000,723299.0000,725423.0000,727206.0000,722387.0000,727797.0000,724011.0000,729401.0000,722438.0000,724952.0000,729761.0000,721426.0000,732316.0000,719322.0000,724601.0000,728408.0000,724922.0000,725353.0000,723980.0000,727126.0000,728870.0000,722117.0000,726134.0000,676610.0000,643929.0000,662794.0000,645442.0000,646012.0000,679887.0000,647967.0000,660429.0000,645621.0000,654770.0000,644499.0000,659348.0000,660901.0000,653717.0000,643288.0000,640822.0000,656762.0000,732416.0000,731575.0000,722568.0000,727758.0000,724181.0000,725444.0000,743507.0000,727878.0000,722498.0000,726465.0000,726656.0000,756532.0000,720694.0000,728730.0000,726355.0000,729551.0000,723329.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,wave_sim topology,100,1,213524600,2200987.6000,2155371.7200,2286876.5000,308575.4875,199128.2127,472029.6228,"2262326.0000,2264681.0000,3803758.0000,3800351.0000,3854114.0000,2263559.0000,2155764.0000,2144873.0000,2096813.0000,1939424.0000,1955504.0000,1946136.0000,1973760.0000,1962397.0000,1942670.0000,1942550.0000,2071535.0000,2231829.0000,2264520.0000,2263227.0000,2269720.0000,2239553.0000,2229354.0000,2297213.0000,2268407.0000,2287524.0000,2251776.0000,2263198.0000,2254011.0000,2228301.0000,2265682.0000,2268177.0000,2312160.0000,2057729.0000,1921931.0000,1962248.0000,1971054.0000,1942029.0000,2057808.0000,2265192.0000,2261214.0000,2267427.0000,2163378.0000,2169921.0000,2171524.0000,2158099.0000,2156957.0000,2158419.0000,2133532.0000,2173258.0000,2121048.0000,2145014.0000,2132039.0000,2145796.0000,2172346.0000,2133371.0000,2200408.0000,2236889.0000,2239202.0000,1924706.0000,1975533.0000,1944564.0000,1952790.0000,1967527.0000,2301521.0000,2260854.0000,2295369.0000,2166444.0000,2167406.0000,2151095.0000,2146086.0000,2175662.0000,2154071.0000,2158610.0000,2138111.0000,2157868.0000,2156726.0000,2145114.0000,2142489.0000,2164350.0000,2160072.0000,2149042.0000,2139273.0000,2228282.0000,2017663.0000,1940065.0000,1931178.0000,1926409.0000,1942460.0000,2128964.0000,2266333.0000,2255954.0000,2353749.0000,2266544.0000,2241156.0000,2249561.0000,2324083.0000,2254782.0000,2259180.0000,2324414.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,jacobi topology,100,1,94920300,986959.9300,964752.8500,1015252.6600,127677.3808,102589.8748,158266.6083,"883763.0000,887411.0000,1396826.0000,1363222.0000,1366989.0000,1364034.0000,1393980.0000,1389823.0000,1246381.0000,1017667.0000,1016084.0000,1043335.0000,1048125.0000,984695.0000,1016074.0000,1014601.0000,1021895.0000,1011605.0000,1045630.0000,1015253.0000,1018879.0000,1012918.0000,1045730.0000,1070988.0000,1045480.0000,1045189.0000,1045820.0000,1014271.0000,1019430.0000,1047314.0000,917557.0000,860950.0000,900756.0000,870308.0000,882671.0000,848757.0000,869536.0000,871109.0000,859106.0000,845701.0000,848576.0000,885066.0000,846362.0000,859797.0000,849277.0000,845140.0000,889153.0000,883774.0000,850851.0000,879956.0000,850841.0000,993601.0000,986688.0000,986347.0000,987179.0000,1045550.0000,986748.0000,988461.0000,986949.0000,1102668.0000,987029.0000,1012457.0000,1015614.0000,1042023.0000,1014040.0000,1021073.0000,984895.0000,1018819.0000,1015583.0000,1015452.0000,1016073.0000,1015332.0000,1015162.0000,1017577.0000,1012858.0000,989523.0000,1014601.0000,1018108.0000,1016645.0000,1014361.0000,1047383.0000,987760.0000,1018027.0000,1013959.0000,1038847.0000,1051651.0000,957813.0000,848026.0000,856502.0000,859437.0000,854989.0000,851442.0000,849838.0000,865279.0000,852373.0000,853215.0000,895696.0000,858676.0000,855179.0000,849829.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,soup topology,100,1,297087700,3006296.0400,2978310.6300,3027608.7800,124500.3538,100564.6212,146297.4650,"2720334.0000,2707139.0000,3076319.0000,3061752.0000,3060268.0000,3069015.0000,3066039.0000,3066020.0000,3071280.0000,3057383.0000,3063214.0000,3062092.0000,3066691.0000,3060599.0000,3073543.0000,3068053.0000,3074095.0000,3058535.0000,3060909.0000,3064416.0000,3061311.0000,3065409.0000,3060929.0000,3062081.0000,3062093.0000,3062864.0000,3057263.0000,3067101.0000,3058596.0000,3061972.0000,3055349.0000,3074967.0000,2937696.0000,2716147.0000,2733269.0000,2949479.0000,3067261.0000,3063594.0000,3066440.0000,3067583.0000,3069526.0000,3057874.0000,3049699.0000,3065950.0000,3061871.0000,3065268.0000,3055059.0000,3065498.0000,3063113.0000,3059286.0000,3063775.0000,3057473.0000,3064397.0000,3065248.0000,3063384.0000,3059507.0000,3063485.0000,3068124.0000,3062422.0000,3061041.0000,3069456.0000,3057714.0000,3059076.0000,3058616.0000,3067723.0000,3066671.0000,3067011.0000,3060639.0000,3064957.0000,3063013.0000,3065558.0000,3048055.0000,2714744.0000,2712780.0000,2715686.0000,2708171.0000,2711758.0000,2722789.0000,2709634.0000,2709474.0000,2714764.0000,2707059.0000,2997490.0000,3058345.0000,3061100.0000,3055719.0000,3061470.0000,3063114.0000,3067743.0000,3060499.0000,3059607.0000,3060750.0000,3058766.0000,3052544.0000,3055860.0000,3059247.0000,3065098.0000,3067432.0000,2929060.0000,2719312.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,chain topology,100,1,67019300,680884.6000,680472.3900,681443.2200,2425.3001,1929.0835,3210.3546,"679807.0000,678694.0000,691979.0000,680919.0000,681199.0000,681590.0000,680838.0000,679056.0000,680397.0000,680668.0000,680398.0000,684726.0000,679746.0000,679236.0000,680498.0000,679626.0000,679195.0000,685166.0000,678003.0000,679756.0000,680017.0000,678865.0000,687892.0000,681710.0000,679777.0000,680729.0000,680007.0000,679396.0000,687601.0000,680699.0000,679245.0000,679146.0000,680037.0000,680808.0000,681931.0000,679466.0000,680949.0000,680247.0000,680628.0000,681710.0000,685667.0000,680938.0000,681801.0000,679726.0000,679336.0000,680788.0000,687451.0000,680057.0000,679386.0000,680288.0000,682131.0000,681750.0000,685607.0000,679055.0000,680007.0000,680258.0000,680147.0000,680358.0000,686409.0000,678875.0000,680187.0000,680338.0000,679807.0000,681389.0000,684446.0000,679296.0000,679406.0000,680107.0000,680799.0000,686760.0000,680187.0000,679186.0000,678273.0000,678363.0000,679426.0000,682372.0000,681079.0000,679546.0000,679726.0000,679956.0000,678815.0000,685397.0000,680798.0000,678655.0000,680518.0000,679316.0000,679235.0000,680227.0000,679276.0000,679375.0000,678594.0000,679827.0000,679856.0000,684666.0000,679136.0000,679896.0000,679246.0000,680177.0000,680388.0000,682021.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,expanding tree topology,100,1,75143900,755875.1600,749874.1500,763748.6600,34583.7983,26039.1919,56648.9689,"705785.0000,706366.0000,790446.0000,769156.0000,768084.0000,769577.0000,768635.0000,972852.0000,770088.0000,767803.0000,766350.0000,769597.0000,775037.0000,770910.0000,767072.0000,766931.0000,765759.0000,769186.0000,767614.0000,768975.0000,767623.0000,766210.0000,764658.0000,771410.0000,765640.0000,768735.0000,772613.0000,768655.0000,774576.0000,770478.0000,772221.0000,770589.0000,770519.0000,774145.0000,766882.0000,769287.0000,766601.0000,764878.0000,768335.0000,770047.0000,770869.0000,770669.0000,767492.0000,769997.0000,770328.0000,768715.0000,769406.0000,770058.0000,770548.0000,732376.0000,708150.0000,706588.0000,708060.0000,710004.0000,713390.0000,710455.0000,710034.0000,707900.0000,707780.0000,709172.0000,715835.0000,708872.0000,710134.0000,708110.0000,709132.0000,720123.0000,773504.0000,770839.0000,770208.0000,767283.0000,768715.0000,777061.0000,768315.0000,768635.0000,767222.0000,768845.0000,776891.0000,769376.0000,769166.0000,767263.0000,770178.0000,768324.0000,768164.0000,767914.0000,767493.0000,768935.0000,775357.0000,768265.0000,768875.0000,769797.0000,768174.0000,789364.0000,713230.0000,708722.0000,706948.0000,708371.0000,705896.0000,712799.0000,707279.0000,705586.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,contracting tree topology,100,1,80021200,783766.6300,777296.8300,789279.8000,30291.5226,26710.9510,32991.3427,"799815.0000,798812.0000,815313.0000,807518.0000,802409.0000,802069.0000,749999.0000,732366.0000,731234.0000,733208.0000,732647.0000,737165.0000,732537.0000,731675.0000,731906.0000,732326.0000,732767.0000,753226.0000,731765.0000,731776.0000,730653.0000,730262.0000,740191.0000,733048.0000,730162.0000,730903.0000,730022.0000,747014.0000,734972.0000,732897.0000,732847.0000,732717.0000,733458.0000,779636.0000,802910.0000,800676.0000,803251.0000,804152.0000,809683.0000,802068.0000,800396.0000,799924.0000,800886.0000,810655.0000,801978.0000,801437.0000,802059.0000,799784.0000,807639.0000,801738.0000,800055.0000,802439.0000,801087.0000,806086.0000,803922.0000,801988.0000,801618.0000,802319.0000,803922.0000,799563.0000,799674.0000,802960.0000,798712.0000,808391.0000,803201.0000,799744.0000,800976.0000,799594.0000,805175.0000,798692.0000,799223.0000,803250.0000,798312.0000,811987.0000,803401.0000,800335.0000,799734.0000,800495.0000,806977.0000,802790.0000,802460.0000,801046.0000,802249.0000,807820.0000,799613.0000,799513.0000,799113.0000,801157.0000,807148.0000,800565.0000,800846.0000,800436.0000,798241.0000,801658.0000,797830.0000,801066.0000,797950.0000,801598.0000,801698.0000,799483.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,wave_sim topology,100,1,425407100,4266970.3100,4236210.0600,4292266.4200,141450.8718,120550.3486,158663.1733,"4342840.0000,4346286.0000,4387133.0000,4353450.0000,4340926.0000,4344743.0000,4336017.0000,4338090.0000,4335135.0000,4341447.0000,4345515.0000,4340175.0000,4346086.0000,4337890.0000,4352537.0000,4338171.0000,4334214.0000,4341026.0000,4342549.0000,4339844.0000,4342499.0000,4337990.0000,4348631.0000,4344202.0000,4345274.0000,4346126.0000,4340686.0000,4346477.0000,4343511.0000,4344072.0000,4336147.0000,4337108.0000,4342219.0000,4167517.0000,4000520.0000,3988708.0000,3996763.0000,4331959.0000,4354643.0000,4338902.0000,4338270.0000,4337279.0000,4338582.0000,4330325.0000,4336427.0000,4268419.0000,3997796.0000,4005890.0000,4235015.0000,4339654.0000,4059853.0000,3997566.0000,3989440.0000,3991964.0000,3994399.0000,4007344.0000,3981564.0000,3987126.0000,4279770.0000,4349492.0000,4351435.0000,4349903.0000,4339443.0000,4358860.0000,4343882.0000,4337299.0000,4340656.0000,4387364.0000,4346357.0000,4342358.0000,4346547.0000,4357338.0000,4344282.0000,4348560.0000,4338953.0000,4344252.0000,4341998.0000,4348671.0000,4337700.0000,4337159.0000,4342540.0000,4343851.0000,4388877.0000,4338391.0000,4312393.0000,3994920.0000,4002755.0000,4001102.0000,3976145.0000,3978258.0000,3982186.0000,3976846.0000,4030988.0000,4347719.0000,4341358.0000,4339132.0000,4341737.0000,4342569.0000,4343561.0000,4344483.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,jacobi topology,100,1,132381000,1305467.7200,1293726.4600,1315849.2600,56317.3254,50781.4602,60211.9827,"1341581.0000,1344797.0000,1221373.0000,1298369.0000,1340639.0000,1348825.0000,1342853.0000,1343635.0000,1336562.0000,1347252.0000,1336992.0000,1337554.0000,1344587.0000,1337583.0000,1335379.0000,1344687.0000,1339117.0000,1333676.0000,1340158.0000,1336963.0000,1342943.0000,1349345.0000,1339968.0000,1342112.0000,1342423.0000,1342663.0000,1343414.0000,1342233.0000,1342763.0000,1344297.0000,1352712.0000,1345859.0000,1344476.0000,1343444.0000,1215422.0000,1214941.0000,1218598.0000,1215862.0000,1214790.0000,1222055.0000,1228106.0000,1219560.0000,1220872.0000,1234498.0000,1220111.0000,1216223.0000,1221312.0000,1216724.0000,1214650.0000,1234358.0000,1216805.0000,1217105.0000,1238787.0000,1217596.0000,1216734.0000,1220562.0000,1222325.0000,1356379.0000,1344236.0000,1352331.0000,1345709.0000,1344957.0000,1347583.0000,1342603.0000,1348034.0000,1350257.0000,1341170.0000,1336802.0000,1346621.0000,1336752.0000,1333986.0000,1345810.0000,1336090.0000,1336070.0000,1232474.0000,1217666.0000,1215782.0000,1217396.0000,1217425.0000,1217034.0000,1281628.0000,1345408.0000,1344126.0000,1348694.0000,1346640.0000,1343856.0000,1346871.0000,1342002.0000,1341992.0000,1344837.0000,1343344.0000,1343815.0000,1347382.0000,1342753.0000,1343144.0000,1349987.0000,1343074.0000,1344817.0000,1347252.0000,1342753.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,soup topology,100,1,234807700,2348000.4400,2327497.1400,2367219.9400,101285.3060,84642.7546,119441.0275,"2231338.0000,2122391.0000,2395429.0000,2380510.0000,2343791.0000,2381623.0000,2350864.0000,2358609.0000,2345504.0000,2364630.0000,2521919.0000,2532158.0000,2528460.0000,2531626.0000,2527087.0000,2565631.0000,2517460.0000,2522590.0000,2500538.0000,2559650.0000,2535113.0000,2523602.0000,2402382.0000,2374619.0000,2349812.0000,2357387.0000,2292453.0000,2374760.0000,2293836.0000,2353709.0000,2383897.0000,2345834.0000,2352037.0000,2352257.0000,2357336.0000,2306640.0000,2351154.0000,2321919.0000,2348830.0000,2350794.0000,2360914.0000,2343129.0000,2378436.0000,2384407.0000,2325596.0000,2346626.0000,2353138.0000,2361254.0000,2343880.0000,2351234.0000,2372425.0000,2362346.0000,2366624.0000,2387764.0000,2416508.0000,2357226.0000,2347167.0000,2356475.0000,2322179.0000,2345514.0000,2389678.0000,2348038.0000,2324023.0000,2321759.0000,2394217.0000,2349631.0000,2375139.0000,2357145.0000,2380370.0000,2353119.0000,2350022.0000,2358458.0000,2343680.0000,2357245.0000,2353719.0000,2344683.0000,2295940.0000,2193596.0000,2103886.0000,2126519.0000,2130045.0000,2110488.0000,2125527.0000,2117492.0000,2150364.0000,2089278.0000,2139383.0000,2193035.0000,2340004.0000,2343190.0000,2338781.0000,2323742.0000,2377103.0000,2350293.0000,2391070.0000,2344892.0000,2376051.0000,2327640.0000,2347718.0000,2370060.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,chain topology,100,1,55098600,580949.2100,565753.1000,601377.2600,89268.0380,69843.8624,109700.1333,"556493.0000,534231.0000,832135.0000,808080.0000,806717.0000,804203.0000,802068.0000,795476.0000,827457.0000,819491.0000,811617.0000,808360.0000,890847.0000,728178.0000,506609.0000,491149.0000,509443.0000,516156.0000,511247.0000,506819.0000,552415.0000,558316.0000,560670.0000,556472.0000,559027.0000,559438.0000,556684.0000,606868.0000,553948.0000,534952.0000,553517.0000,533028.0000,557315.0000,556333.0000,556903.0000,534340.0000,558156.0000,552505.0000,553848.0000,560891.0000,529191.0000,584927.0000,530234.0000,529832.0000,531977.0000,556453.0000,556733.0000,534131.0000,553267.0000,558817.0000,555280.0000,555471.0000,558065.0000,529581.0000,535182.0000,553097.0000,559709.0000,583114.0000,586770.0000,558868.0000,557455.0000,559759.0000,558336.0000,576440.0000,581631.0000,583414.0000,577172.0000,583644.0000,503052.0000,514133.0000,506608.0000,494105.0000,506157.0000,513872.0000,498443.0000,503563.0000,517258.0000,506108.0000,563486.0000,554980.0000,535543.0000,587472.0000,582602.0000,586770.0000,558096.0000,560931.0000,555621.0000,584827.0000,586460.0000,584506.0000,559920.0000,558056.0000,559539.0000,558456.0000,558577.0000,533019.0000,584606.0000,586169.0000,532307.0000,558727.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,expanding tree topology,100,1,73334300,699407.3600,675062.5500,730208.9800,138578.7256,113632.4844,165928.7969,"643317.0000,679406.0000,1066429.0000,1075507.0000,1072952.0000,1103790.0000,1075196.0000,1107748.0000,1036723.0000,928327.0000,925372.0000,934009.0000,927376.0000,893942.0000,937755.0000,896547.0000,927937.0000,925262.0000,810184.0000,726565.0000,724852.0000,757684.0000,719963.0000,724461.0000,731354.0000,721706.0000,701067.0000,752795.0000,684235.0000,643979.0000,676520.0000,671511.0000,670599.0000,657444.0000,657003.0000,676020.0000,664477.0000,665941.0000,610926.0000,571191.0000,564468.0000,564047.0000,563186.0000,553757.0000,568266.0000,608681.0000,605926.0000,611898.0000,614893.0000,601378.0000,634190.0000,611578.0000,607720.0000,606598.0000,630654.0000,609734.0000,609573.0000,604764.0000,611167.0000,603752.0000,620224.0000,605005.0000,607089.0000,625444.0000,661883.0000,608812.0000,605406.0000,608972.0000,640152.0000,601829.0000,610545.0000,628460.0000,641925.0000,601368.0000,607309.0000,629020.0000,606508.0000,652665.0000,623149.0000,615755.0000,608482.0000,696087.0000,748417.0000,760419.0000,668886.0000,612478.0000,613621.0000,613872.0000,610024.0000,613081.0000,650481.0000,653036.0000,679446.0000,671271.0000,669517.0000,660420.0000,668305.0000,670419.0000,669066.0000,637586.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,contracting tree topology,100,1,69794300,764423.4700,754143.8000,778812.8700,61133.0776,46390.8757,75963.6424,"753216.0000,741914.0000,949738.0000,934840.0000,927597.0000,933868.0000,952153.0000,925142.0000,959967.0000,931644.0000,952273.0000,929851.0000,721556.0000,725473.0000,755229.0000,729230.0000,752294.0000,781209.0000,727137.0000,726946.0000,752594.0000,723709.0000,760108.0000,754999.0000,751713.0000,727828.0000,753837.0000,755209.0000,756762.0000,758717.0000,717327.0000,751983.0000,731435.0000,756843.0000,752194.0000,724862.0000,758626.0000,751062.0000,786118.0000,783123.0000,755750.0000,720924.0000,725944.0000,757804.0000,751663.0000,731655.0000,718831.0000,756602.0000,727998.0000,752494.0000,783914.0000,751903.0000,760560.0000,751222.0000,730343.0000,751372.0000,721736.0000,769877.0000,732406.0000,751211.0000,732105.0000,724150.0000,754509.0000,755931.0000,726936.0000,748857.0000,725974.0000,757955.0000,757664.0000,719442.0000,755160.0000,724101.0000,756242.0000,755600.0000,727197.0000,755971.0000,752925.0000,756161.0000,723039.0000,755660.0000,752915.0000,697890.0000,756532.0000,752835.0000,786319.0000,750992.0000,724191.0000,701227.0000,754398.0000,723289.0000,752565.0000,758556.0000,783783.0000,741062.0000,747004.0000,716135.0000,720364.0000,745191.0000,740181.0000,746804.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,wave_sim topology,100,1,310490200,3090336.6800,3048212.1500,3147213.5600,249339.4125,192390.1923,327391.3950,"3082832.0000,3086168.0000,3951117.0000,4105259.0000,2926635.0000,2935422.0000,2843307.0000,2902870.0000,2905566.0000,2917077.0000,2802279.0000,2887231.0000,2874336.0000,2873324.0000,2855761.0000,3043868.0000,3023449.0000,3079185.0000,3075367.0000,3045922.0000,3080547.0000,3050380.0000,3083874.0000,3059317.0000,3090175.0000,3055058.0000,3094584.0000,3084614.0000,3084294.0000,3067462.0000,3064196.0000,3069707.0000,3087570.0000,3081709.0000,2884175.0000,2861893.0000,2872943.0000,2824782.0000,2848387.0000,2872052.0000,2842606.0000,2867544.0000,2841193.0000,3303188.0000,4009637.0000,3990152.0000,2762885.0000,2900085.0000,2892631.0000,2839651.0000,2886800.0000,2884856.0000,2866451.0000,2831425.0000,2947455.0000,3054478.0000,3075577.0000,3401355.0000,3460707.0000,3365106.0000,3106085.0000,3031233.0000,3038316.0000,3113890.0000,3103972.0000,3104653.0000,3094033.0000,3053285.0000,3111476.0000,3075258.0000,3127756.0000,3080526.0000,3112067.0000,3041163.0000,3340780.0000,3381387.0000,3430540.0000,3013330.0000,2867494.0000,2839690.0000,3036133.0000,3004112.0000,3065659.0000,3053816.0000,3082140.0000,3101116.0000,3082410.0000,3038748.0000,3131083.0000,3280256.0000,3416153.0000,3396336.0000,3107078.0000,3115142.0000,3187239.0000,3414530.0000,3393902.0000,3438736.0000,3098631.0000,3065038.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,jacobi topology,100,1,102775800,1024683.7100,1008997.4200,1047692.8900,95665.1039,69778.3838,126045.4293,"1016074.0000,1014671.0000,1374473.0000,1364986.0000,1365607.0000,1365096.0000,1363783.0000,1362100.0000,1221243.0000,1045640.0000,1012778.0000,1017667.0000,1014591.0000,1046752.0000,1043486.0000,1016014.0000,1045149.0000,1047354.0000,1044558.0000,1012988.0000,1016104.0000,1033737.0000,1045920.0000,1073002.0000,1046522.0000,1045400.0000,1043155.0000,1021565.0000,1009722.0000,990936.0000,1019491.0000,989013.0000,996677.0000,1008659.0000,1040450.0000,1036222.0000,1046281.0000,1017656.0000,1013158.0000,1016595.0000,1019380.0000,1015613.0000,1015723.0000,1016765.0000,1017186.0000,1019180.0000,1008981.0000,1047644.0000,1013600.0000,1019260.0000,1013279.0000,1016454.0000,1046863.0000,1042444.0000,1047323.0000,1017807.0000,966239.0000,951351.0000,948586.0000,935982.0000,952493.0000,935611.0000,956121.0000,967552.0000,1009441.0000,967592.0000,959586.0000,971870.0000,979594.0000,988481.0000,983492.0000,945159.0000,1017968.0000,947925.0000,947885.0000,938757.0000,973333.0000,960969.0000,939048.0000,935321.0000,942394.0000,982781.0000,947474.0000,940110.0000,946472.0000,946062.0000,938136.0000,942514.0000,943506.0000,1014080.0000,1014441.0000,1019220.0000,1013599.0000,1017135.0000,1016494.0000,1016925.0000,1016234.0000,1017256.0000,1017687.0000,1012718.0000" +normalizing randomized box sets - 2d,"small, native",100,43,2425200,582.5047,581.0528,588.5205,12.9835,1.6910,30.2940,"580.6047,580.5814,606.6744,586.6512,581.7674,580.8140,581.5349,582.9070,579.2093,581.7442,580.1395,581.2791,581.5349,578.9535,582.4419,581.2791,580.3488,580.3721,580.5814,581.0698,580.5814,578.0233,582.4651,579.8837,581.7674,581.2791,580.8372,581.2791,580.6047,580.3488,577.5581,581.3023,579.8837,580.1395,579.4186,581.3023,578.9535,576.8605,582.0000,580.8140,580.5814,581.7674,581.7442,580.5814,578.0233,578.7209,581.2791,581.5116,579.6512,581.5116,580.6047,580.5814,578.4884,581.7442,582.2326,581.5116,582.2326,583.6047,582.0000,579.1860,581.7674,581.5116,582.0000,581.5116,581.5349,582.9302,579.4186,582.9070,582.2093,580.8372,582.9070,581.7674,582.4419,580.1395,578.7209,580.8140,581.5349,581.5116,581.0465,579.8837,581.0698,578.2558,580.1163,579.4186,581.7442,584.0930,583.1628,581.5116,580.3721,581.0465,582.6977,581.9767,579.6744,579.6512,583.6279,577.3256,583.6047,708.2558,581.5349,580.5814" +normalizing randomized box sets - 2d,"small, embedded in 3d",100,35,2415000,693.5246,692.9840,694.7817,3.9881,2.0746,8.2127,"689.2571,693.8286,726.7429,697.8286,695.8286,694.1143,693.8286,695.8286,689.2571,694.1143,696.6857,693.2857,693.5429,693.8286,688.6857,692.9714,694.1143,690.4000,693.2571,694.4000,684.9714,692.6857,694.1143,689.8286,694.4000,694.6857,688.6857,693.5429,693.8286,692.9714,695.5714,694.1143,688.9714,693.8286,693.8286,694.1143,693.2571,695.8286,690.1143,691.8286,692.9714,693.2571,694.1429,694.9714,691.5429,689.8286,694.1143,691.8286,695.2571,694.1143,691.8286,689.2571,695.5429,695.2857,694.6857,696.1143,692.9714,691.2571,695.8286,693.5429,694.4000,695.8286,692.1143,690.1143,694.9714,692.4000,694.9714,693.5429,694.1143,689.5143,694.1143,691.2571,694.1143,695.2571,694.6857,693.5429,690.9714,692.4000,695.0000,695.8286,693.2571,692.6857,689.8286,693.2571,695.8286,694.4000,692.4000,695.5429,690.9714,694.1143,694.6857,692.9714,691.8286,694.6857,690.1143,692.4000,695.0000,693.5429,689.2286,695.2571" +normalizing randomized box sets - 2d,"medium, native",100,4,2444400,5644.7800,5620.1050,5706.9525,176.7395,17.4317,326.2723,"5585.2500,5632.5000,6732.2500,5682.7500,5635.2500,5642.7500,5625.0000,5640.2500,5635.2500,5637.7500,5605.0000,5635.2500,5632.7500,5612.7500,5605.0000,5610.2500,5640.2500,5655.2500,5620.0000,5615.0000,5642.7500,5610.2500,5627.5000,5625.2500,5602.7500,5615.0000,5612.7500,5637.7500,5612.7500,5622.5000,5605.2500,5605.0000,5607.5000,5627.7500,5627.7500,5597.5000,5635.2500,5632.7500,5630.2500,5597.5000,5637.7500,5615.2500,5610.0000,5617.7500,5595.0000,5602.5000,5635.2500,5605.0000,5610.0000,5605.2500,5622.5000,5625.2500,5620.2500,5637.7500,5615.0000,5622.7500,5617.7500,5620.0000,5605.0000,5602.7500,5617.5000,5597.7500,7005.2500,5670.2500,5642.7500,5630.2500,5622.7500,5620.0000,5632.7500,5622.7500,5617.7500,5610.0000,5615.2500,5615.2500,5632.5000,5612.7500,5617.7500,5637.7500,5637.5000,5625.0000,5610.2500,5587.5000,5615.2500,5635.2500,5625.2500,5627.5000,5640.2500,5620.2500,5620.2500,5627.5000,5600.0000,5597.7500,5575.0000,5620.2500,5612.5000,5590.2500,5587.5000,5597.7500,5600.0000,5625.2500" +normalizing randomized box sets - 2d,"medium, embedded in 3d",100,4,2661200,6879.0225,6858.5175,6960.9725,190.1284,23.1489,450.5929,"6865.0000,6857.5000,8753.7500,6995.2500,6917.5000,6922.5000,6902.7500,6877.5000,6890.0000,6917.7500,6890.0000,6900.0000,6857.5000,6870.0000,6882.5000,6860.0000,6860.0000,6837.5000,6832.5000,6850.0000,6837.5000,6847.5000,6847.5000,6837.5000,6845.0000,6827.5000,6850.0000,6847.5000,6842.5000,6850.0000,6840.0000,6867.5000,6857.5000,6845.0000,6855.0000,6837.5000,6862.5000,6830.0000,6850.0000,6850.0000,6887.5000,6832.5000,6865.0000,6857.5000,6847.5000,6847.5000,6872.7500,6852.5000,6870.0000,6862.5000,6885.0000,6857.5000,6832.2500,6840.0000,6837.5000,6862.5000,6877.5000,6835.0000,6872.5000,6855.0000,6807.5000,6857.5000,6847.5000,6850.0000,6872.5000,6885.0000,6819.7500,6830.0000,6857.5000,6862.5000,6849.7500,6872.5000,6872.5000,6870.0000,6905.2500,6862.5000,6875.0000,6880.0000,6862.5000,6835.0000,6872.7500,6870.0000,6872.5000,6847.5000,6842.5000,6852.5000,6877.5000,6842.5000,6847.5000,6845.0000,6870.2500,6855.0000,6890.0000,6867.5000,6822.5000,6827.5000,6850.0000,6825.0000,6857.5000,6875.0000" +normalizing randomized box sets - 2d,"large, native",100,1,19583800,198399.1000,197919.3700,200206.5100,4248.3363,952.1847,9888.8029,"198264.0000,194236.0000,202011.0000,197482.0000,196861.0000,197503.0000,197362.0000,196350.0000,197683.0000,197442.0000,196831.0000,196600.0000,199046.0000,198294.0000,198564.0000,197843.0000,197753.0000,198174.0000,198294.0000,198103.0000,202122.0000,198514.0000,198524.0000,198224.0000,198074.0000,198213.0000,198584.0000,197923.0000,197813.0000,198304.0000,198655.0000,198524.0000,197713.0000,198314.0000,198354.0000,198575.0000,198775.0000,198414.0000,198073.0000,198304.0000,198455.0000,201850.0000,197563.0000,198244.0000,198354.0000,197522.0000,197753.0000,197353.0000,197983.0000,198043.0000,198223.0000,197683.0000,198705.0000,198114.0000,198674.0000,198044.0000,198905.0000,198154.0000,198364.0000,198214.0000,198534.0000,239382.0000,197763.0000,197823.0000,196931.0000,197252.0000,197312.0000,197002.0000,196891.0000,197783.0000,197923.0000,197322.0000,196971.0000,197272.0000,196721.0000,197573.0000,197482.0000,197492.0000,196510.0000,197272.0000,196761.0000,201190.0000,197211.0000,197382.0000,196911.0000,198835.0000,197933.0000,198104.0000,198354.0000,197793.0000,197753.0000,197763.0000,197933.0000,197202.0000,197272.0000,197833.0000,198464.0000,198825.0000,198114.0000,198174.0000" +normalizing randomized box sets - 2d,"large, embedded in 3d",100,1,20977800,213858.2200,213674.5700,214143.5500,1146.7581,811.1828,1536.8163,"213303.0000,213272.0000,218633.0000,214735.0000,218142.0000,214124.0000,213913.0000,213363.0000,213803.0000,213773.0000,214094.0000,214194.0000,213663.0000,213163.0000,213823.0000,213393.0000,213362.0000,213573.0000,213473.0000,213482.0000,213603.0000,213924.0000,213563.0000,217170.0000,213793.0000,213523.0000,213563.0000,213052.0000,213292.0000,213242.0000,213794.0000,213803.0000,213192.0000,214034.0000,213793.0000,213723.0000,213562.0000,213393.0000,212801.0000,213293.0000,213753.0000,216569.0000,214615.0000,213683.0000,213512.0000,213643.0000,213532.0000,214114.0000,213323.0000,212641.0000,213332.0000,213473.0000,213763.0000,213403.0000,213423.0000,213973.0000,214495.0000,213533.0000,212952.0000,213092.0000,218392.0000,213703.0000,213493.0000,214114.0000,213332.0000,213032.0000,213312.0000,213382.0000,213653.0000,214244.0000,213193.0000,213553.0000,213102.0000,213603.0000,213813.0000,213833.0000,213823.0000,213342.0000,213472.0000,217300.0000,213152.0000,213092.0000,213764.0000,213362.0000,213573.0000,213202.0000,213152.0000,213423.0000,213713.0000,213082.0000,212691.0000,213373.0000,213503.0000,214324.0000,213773.0000,214154.0000,213183.0000,213542.0000,218062.0000,213793.0000" +normalizing randomized box sets - 3d,small - native,100,10,2459000,2522.0250,2514.9110,2545.9230,57.5087,7.4908,127.7186,"2520.6000,2515.6000,2735.0000,2524.6000,2531.6000,2524.6000,2517.6000,2533.6000,2524.6000,2522.6000,2515.6000,2516.5000,2512.6000,2516.6000,2518.6000,2508.5000,2516.5000,2530.6000,2512.6000,2509.6000,2513.6000,2507.5000,2507.6000,2520.6000,2520.6000,3046.5000,2518.5000,2524.6000,2520.6000,2512.6000,2513.6000,2512.5000,2524.6000,2509.6000,2528.6000,2522.6000,2515.6000,2512.6000,2512.5000,2509.6000,2516.6000,2519.6000,2519.5000,2516.5000,2518.6000,2504.6000,2512.5000,2517.5000,2523.6000,2504.6000,2505.5000,2506.5000,2506.6000,2504.5000,2519.6000,2513.6000,2516.6000,2515.6000,2530.6000,2517.5000,2504.6000,2494.5000,2509.6000,2511.6000,2513.6000,2518.5000,2514.6000,2515.6000,2505.6000,2504.5000,2503.6000,2514.5000,2515.5000,2514.6000,2508.6000,2516.6000,2506.5000,2513.6000,2510.6000,2513.5000,2518.6000,2513.6000,2512.6000,2514.5000,2513.5000,2514.6000,2509.6000,2508.5000,2515.6000,2509.6000,2520.6000,2510.5000,2520.6000,2506.6000,2506.6000,2512.5000,2507.6000,2515.6000,2501.5000,2506.6000" +normalizing randomized box sets - 3d,medium - native,100,3,2766900,9601.4067,9585.2433,9650.3700,130.6406,44.8875,285.2653,"9584.0000,9547.3333,9991.6667,9657.6667,9611.0000,9590.6667,9601.0000,9554.0000,9587.6667,9571.0000,9590.6667,9591.0000,9577.3333,9517.3333,9577.3333,10766.6667,9607.3333,9621.0000,9554.3333,9590.6667,9581.0000,9557.3333,9677.6667,9627.3333,9637.6667,9601.0000,9624.3333,9494.0000,9567.3333,9590.6667,9524.0000,9601.0000,9557.6667,9590.6667,9504.0000,9584.3333,9597.6667,9651.0000,9517.3333,9574.0000,9527.6667,9584.0000,9591.0000,9540.6667,9487.3333,9637.6667,9581.0000,9537.3333,9604.3333,9574.0000,9584.0000,9577.6667,9624.3333,9634.0000,9561.0000,9637.6667,9544.0000,9634.3333,9571.0000,9500.6667,9624.3333,9550.6667,9537.3333,9517.6667,9524.0000,9584.0000,9567.3333,9510.6667,9571.0000,9607.3333,9527.3333,9594.0000,9674.3333,9520.6667,9587.3333,9624.0000,9574.3333,9604.3333,9587.3333,9631.0000,9601.0000,9584.0000,9647.6667,9597.6667,9607.6667,9607.6667,9584.0000,9617.6667,9527.3333,9657.6667,9564.3333,9580.6667,9641.0000,9624.3333,9577.6667,9600.6667,9594.3333,9677.6667,9627.6667,9617.6667" +normalizing randomized box sets - 3d,large - native,100,1,217410900,2181704.2900,2171514.7600,2188618.6600,42323.3067,31178.0550,53084.1852,"2192774.0000,2194738.0000,2217811.0000,2184539.0000,2190921.0000,2196251.0000,2193125.0000,2193646.0000,2191291.0000,2210077.0000,2189017.0000,2196923.0000,2194597.0000,2196302.0000,2196982.0000,2192092.0000,2197473.0000,2195559.0000,2198365.0000,2200239.0000,2197203.0000,2184168.0000,2195099.0000,2195970.0000,2191302.0000,2198275.0000,2193185.0000,2201100.0000,2194348.0000,2197734.0000,2192624.0000,2196210.0000,2192855.0000,2199117.0000,2197493.0000,2203144.0000,2198125.0000,2194998.0000,2196191.0000,2192704.0000,2187074.0000,2068760.0000,2057397.0000,2058720.0000,2063409.0000,2051616.0000,2152397.0000,2049062.0000,2053922.0000,2056867.0000,2045595.0000,2058800.0000,2193806.0000,2195479.0000,2193856.0000,2201641.0000,2191572.0000,2214085.0000,2193295.0000,2202101.0000,2193796.0000,2190620.0000,2200830.0000,2190660.0000,2197213.0000,2194307.0000,2197453.0000,2193816.0000,2197343.0000,2192615.0000,2196190.0000,2196401.0000,2194157.0000,2198495.0000,2191912.0000,2195589.0000,2191994.0000,2194377.0000,2194037.0000,2192584.0000,2191091.0000,2198515.0000,2199517.0000,2198896.0000,2201601.0000,2195750.0000,2200298.0000,2197733.0000,2194618.0000,2191923.0000,2197343.0000,2212362.0000,2192774.0000,2198004.0000,2200279.0000,2198034.0000,2195680.0000,2196812.0000,2193066.0000,2197723.0000" +normalizing a fully mergeable tiling of boxes - 1,"small, native",100,729,2332800,28.3744,28.3429,28.4284,0.2048,0.1395,0.3800,"28.4883,28.2812,29.8477,28.5021,28.2949,28.4595,28.5158,28.4458,28.4335,28.4321,28.3772,28.1989,28.5144,28.4321,28.2126,28.5007,28.4047,28.4746,28.1989,28.5418,28.1715,28.1578,28.5021,28.2126,28.4458,28.2689,28.2263,28.1578,28.5418,28.2126,28.2263,28.3374,28.4321,28.1715,28.5295,28.2263,28.2126,28.5144,28.1715,28.3512,28.5418,28.5295,28.2263,28.5144,28.4060,28.1715,28.2126,28.5281,28.1852,28.3923,28.3635,28.2401,28.1852,28.5158,28.1852,28.4595,28.2812,28.4609,28.2126,28.5007,28.1989,28.3086,28.5144,28.1852,28.2263,28.4883,28.1715,28.3224,28.5418,28.5021,28.5418,28.5021,28.1715,28.2126,28.4870,28.5281,28.2401,28.5158,28.5007,28.5295,28.2401,28.5144,28.4335,28.2401,28.5281,28.2401,28.3100,28.5418,28.2126,28.2812,28.5418,28.5007,28.1989,28.4870,28.1715,28.1715,28.5432,28.1852,28.1578,28.5144" +normalizing a fully mergeable tiling of boxes - 1,"small, embedded in 3d",100,476,2332400,50.5811,50.5054,50.8450,0.6478,0.1973,1.4736,"50.5546,50.4265,51.2479,50.4685,50.8067,50.8277,50.5756,50.4055,50.7017,50.4916,50.3214,50.5105,50.8277,50.8067,50.6807,50.4286,50.7626,50.2164,50.7647,50.7647,50.8277,50.5315,50.3445,50.4706,50.6786,50.4076,50.4496,50.1744,50.7857,50.4076,50.2794,50.3445,50.5945,50.6387,50.5336,50.7227,50.4265,50.7647,50.1975,50.4055,50.3655,50.7017,50.8067,50.8277,50.6786,50.5756,50.5966,49.9013,50.1954,50.8466,50.7626,50.5336,50.5126,50.4475,50.4496,56.6786,50.6387,50.5966,50.5336,50.1534,50.3004,50.5756,50.5756,50.5336,50.4265,50.4706,50.5336,50.5105,50.3866,50.5336,50.3845,49.8382,50.5336,50.4286,50.5525,50.4916,50.5126,50.5735,50.5756,50.6176,50.6176,50.3004,50.5756,50.5966,50.5105,50.6155,50.3025,50.3004,50.5756,50.5336,50.6155,50.6155,49.8382,50.5966,50.3866,50.4055,50.3445,50.5336,50.6155,50.4916" +normalizing a fully mergeable tiling of boxes - 1,"medium, native",100,84,2385600,303.7345,302.9146,304.6367,4.3721,4.0234,4.6795,"310.6905,310.4524,307.1071,299.4762,300.6667,300.5476,299.3571,299.9524,300.0714,299.9405,299.7024,299.5952,299.7143,300.9048,301.7381,301.2619,299.1190,299.1190,299.4643,299.4762,298.8810,299.0000,301.6190,300.7857,299.9524,299.8214,299.2262,299.7143,300.0714,300.4286,299.5952,300.9048,299.9524,300.9048,300.1905,300.5357,299.5833,299.9524,300.5476,300.5476,301.2619,300.4286,299.7143,299.3571,298.9881,298.7619,300.4286,299.8333,299.8333,303.0476,302.6905,299.4762,299.8333,303.6429,304.3571,303.2857,301.2619,303.0476,299.5952,303.0476,304.7262,302.0952,302.2143,302.7024,301.1429,304.8333,309.1429,306.1429,309.0238,309.2500,309.3690,309.7262,310.4405,305.5595,302.4524,304.1190,310.4524,309.6071,311.1667,310.9167,308.9048,311.3929,309.6190,310.6786,309.5000,307.2262,310.2024,302.9286,309.3690,309.9643,309.3810,311.2738,309.1429,309.0119,309.6071,309.6190,309.9643,309.5000,309.0119,309.6071" +normalizing a fully mergeable tiling of boxes - 1,"medium, embedded in 3d",100,48,2380800,427.9181,427.6906,428.3548,1.5508,0.9896,3.0112,"428.8958,426.8125,439.7500,429.1042,428.2708,429.1042,427.0208,428.6875,426.6042,428.6875,426.8125,426.8125,428.6875,426.6042,428.6875,426.8125,428.8958,426.7917,428.6875,426.5833,428.7083,426.5833,428.6875,426.8125,428.8958,426.8125,428.6875,426.6042,428.6875,426.8125,428.8958,426.8125,428.6875,426.8125,428.6875,426.8125,428.8958,426.8125,428.6875,426.8125,428.6875,426.8125,428.8958,426.8125,428.8958,426.8125,428.6875,426.6042,428.6875,426.8125,428.8958,428.6875,428.4792,426.7917,428.9167,426.7917,428.6875,426.6042,428.6875,426.8125,428.8958,426.8125,428.6875,426.8125,428.6875,426.8125,428.8958,426.8125,428.6875,426.6042,428.6875,426.8125,428.8958,426.8125,428.8958,426.8125,428.6875,426.8125,428.6875,426.8125,428.8958,426.8125,428.6875,426.6042,428.6875,426.8125,428.8958,426.8125,428.6875,426.8125,428.6875,426.7917,428.9167,426.7917,428.6875,426.8125,428.6875,426.8125,428.8958,426.8125" +normalizing a fully mergeable tiling of boxes - 1,"large, native",100,4,2786000,7542.9125,7535.8925,7551.7225,39.7765,31.8957,60.9535,"7556.2500,7538.7500,7764.2500,7493.7500,7518.7500,7566.5000,7563.7500,7539.0000,7528.7500,7481.2500,7473.5000,7501.2500,7516.2500,7506.2500,7498.5000,7503.7500,7451.2500,7498.7500,7496.2500,7503.7500,7543.7500,7579.0000,7566.2500,7589.0000,7589.0000,7551.2500,7513.7500,7511.2500,7503.7500,7506.2500,7521.2500,7549.0000,7521.2500,7526.2500,7511.2500,7508.7500,7508.7500,7533.7500,7531.2500,7526.5000,7548.7500,7518.7500,7531.2500,7531.2500,7531.2500,7538.7500,7531.2500,7538.7500,7513.7500,7538.7500,7541.2500,7501.2500,7518.7500,7503.7500,7496.2500,7506.2500,7516.2500,7539.0000,7506.2500,7488.5000,7473.5000,7579.0000,7586.2500,7571.5000,7581.5000,7583.7500,7574.0000,7576.2500,7589.0000,7568.7500,7559.0000,7568.7500,7574.0000,7576.2500,7574.0000,7576.2500,7591.5000,7591.5000,7563.7500,7579.0000,7581.2500,7549.0000,7576.2500,7546.5000,7543.7500,7548.7500,7564.0000,7593.7500,7581.5000,7531.2500,7604.0000,7599.0000,7578.7500,7541.5000,7543.7500,7531.2500,7548.7500,7548.7500,7558.7500,7551.5000" +normalizing a fully mergeable tiling of boxes - 1,"large, embedded in 3d",100,2,2584000,13103.6550,13084.1700,13167.2300,160.8581,35.7838,355.3317,"13078.5000,13109.0000,13660.0000,13149.0000,13063.5000,13084.0000,13139.0000,13134.0000,13058.5000,13079.0000,13044.0000,13119.0000,13108.5000,13134.0000,13124.0000,13059.0000,13124.0000,13133.5000,13128.5000,13119.0000,13094.0000,13099.0000,13124.0000,13108.5000,13129.0000,13104.0000,13029.0000,13073.5000,13104.0000,13084.0000,13078.5000,13084.0000,13094.0000,13068.5000,13108.5000,13054.0000,13109.0000,13154.0000,13164.0000,13139.0000,13083.5000,13014.0000,13058.5000,13089.0000,13069.0000,13113.5000,13078.5000,13069.0000,13058.5000,13093.5000,13079.0000,13099.0000,13028.5000,13094.0000,13104.0000,13073.5000,13039.0000,13038.5000,13078.5000,13089.0000,13084.0000,13068.5000,13024.0000,13078.5000,13074.0000,13114.0000,13079.0000,13048.5000,13074.0000,13109.0000,13078.5000,13054.0000,13023.5000,13074.0000,13079.0000,13088.5000,13028.5000,13034.0000,13113.5000,13119.0000,13119.0000,13089.0000,13048.5000,13014.0000,13094.0000,14561.5000,13124.0000,13053.5000,13049.0000,13018.5000,13094.0000,13074.0000,13058.5000,13054.0000,13023.5000,13083.5000,13094.0000,13074.0000,13053.5000,13039.0000" +normalizing a fully mergeable tiling of boxes - 2,"small, native",100,209,2361700,95.8095,95.7658,95.9441,0.3590,0.1403,0.7944,"95.7703,95.8660,99.0766,95.8182,95.6220,95.8660,95.5311,96.1053,95.6268,95.8182,95.7225,95.8182,95.5311,95.9139,95.6268,95.8660,95.6746,95.7703,95.9139,95.9617,95.4833,95.8660,95.6746,95.7703,95.7703,95.9617,95.5311,95.8660,95.7703,95.7703,95.7225,95.8182,95.7703,95.8660,95.7225,95.8660,95.6268,95.8182,95.6268,95.8182,95.7703,95.8660,95.6268,95.8182,95.7225,95.8182,95.5789,96.1053,95.5311,95.8660,95.7703,95.7225,95.9139,95.9617,95.5311,95.9139,95.7703,95.8182,95.6746,95.9617,95.7225,95.8660,95.6746,96.1053,95.6746,95.8182,95.7225,95.8660,95.5311,95.9617,95.6268,95.8182,95.6268,95.8660,95.5742,95.9617,95.5311,96.0574,95.6746,95.8660,95.7225,95.9617,95.8182,95.9617,95.6268,95.9139,95.5789,95.8660,95.6746,95.9139,95.6268,95.9617,95.6268,95.8182,95.6268,95.8660,95.5742,96.0574,95.6268,95.8182" +normalizing a fully mergeable tiling of boxes - 2,"small, embedded in 3d",100,197,2364000,124.8730,124.6473,125.4023,1.6695,0.8938,3.4519,"125.8629,124.5431,126.8274,124.2893,138.7310,125.5584,124.3350,124.3401,125.9645,124.1827,125.7614,126.0152,126.3706,126.1168,126.5787,123.4721,123.6244,125.0508,125.4569,125.8629,124.6447,123.4213,126.4213,123.9340,124.3350,125.4569,125.8122,124.3909,123.4213,124.2335,123.9340,125.3046,125.6091,125.1015,126.4213,124.1827,124.0863,124.1320,124.1878,124.0812,124.0355,122.7614,123.9797,124.0812,123.8832,125.0000,125.9137,124.6904,122.7614,125.1015,124.8985,123.9797,124.0863,124.2843,124.9492,123.5736,125.5584,124.9492,123.9289,124.4365,126.4213,123.3756,124.6904,124.2386,124.0812,124.3401,125.3553,123.2690,124.1320,125.8122,126.4264,125.4569,123.9289,124.0812,125.2995,124.0355,125.9137,125.0000,124.5888,125.7614,125.5025,125.9695,124.3858,125.0508,125.8629,125.5584,124.2893,123.3198,124.1320,124.0863,124.0305,123.9797,124.0305,123.9797,125.8122,124.2893,124.1827,124.7970,123.8782,125.0457" +normalizing a fully mergeable tiling of boxes - 2,"medium, native",100,28,2458400,897.3343,889.1018,904.0511,37.7822,31.2959,43.9926,"913.0714,913.8214,841.1429,812.8929,818.9643,816.8214,820.0357,819.3571,820.3929,821.1429,817.2143,818.2500,819.3571,818.9643,819.7143,819.3214,819.3571,812.1786,812.8929,815.0714,997.1786,913.0714,915.9643,917.3929,912.0000,912.3929,912.0000,910.9286,915.9643,916.2857,912.0357,912.0000,916.6429,919.5357,910.9643,906.6429,909.5000,912.0357,913.0714,910.6071,913.4286,917.0357,919.5357,913.8214,910.9286,914.8929,911.6429,912.7500,912.0000,915.9643,913.4643,910.2143,908.4286,913.8214,913.1071,912.3571,910.9643,912.3571,911.6786,916.6429,911.3214,908.4286,912.0357,910.9286,914.1786,919.1786,917.7143,914.8571,912.7500,906.6429,915.9643,919.5357,915.2143,917.0357,915.6071,910.9286,911.3214,915.5714,912.7500,918.1071,915.5714,920.9286,916.2857,914.5357,916.6786,911.2857,914.8929,912.0000,912.3929,910.9286,912.3929,912.0000,916.3214,912.0357,913.7857,908.8214,914.5000,911.6786,910.2143,914.8929" +normalizing a fully mergeable tiling of boxes - 2,"medium, embedded in 3d",100,24,2407200,1027.9550,1025.8604,1035.6571,18.1782,4.4978,41.8259,"1022.7083,1030.6250,1061.5417,1028.0833,1028.0833,1016.8750,1022.2500,1026.8750,1030.6250,1028.9583,1025.6250,1023.9167,1017.2500,1023.1250,1027.6667,1023.5417,1022.7083,1028.9583,1033.1250,1020.1667,1030.2083,1025.6250,1023.1250,1027.7083,1022.2500,1016.4583,1022.2500,1028.5417,1026.4583,1026.4583,1028.5417,1033.1250,1024.7500,1030.2083,1028.1250,1023.1250,1025.2083,1028.5417,1028.5000,1019.3333,1026.4583,1024.3333,1027.2500,1027.7083,1029.3750,1018.1250,1022.6667,1029.3750,1029.7917,1031.8750,1031.4583,1031.4583,1019.3750,1024.3333,1023.1250,1029.3750,1029.7917,1200.5417,1024.3333,1023.5417,1026.0417,1033.9583,1032.7083,1024.3750,1016.0000,1020.2083,1016.8333,1025.2083,1025.2083,1027.6667,1024.3333,1021.8750,1027.2917,1015.5833,1026.4583,1025.1667,1027.2500,1032.7083,1027.2917,1032.2917,1021.8750,1023.1250,1020.5833,1026.4583,1030.6250,1027.2917,1026.4583,1024.7500,1023.9583,1026.8750,1024.3750,1023.5000,1026.8750,1025.2083,1032.2917,1031.4583,1028.5417,1026.0417,1026.4167,1024.7917" +normalizing a fully mergeable tiling of boxes - 2,"large, native",100,1,3627000,37302.1200,37222.2200,37505.7700,604.5848,231.5039,1201.1839,"37339.0000,37018.0000,40164.0000,38401.0000,37549.0000,37689.0000,37539.0000,37239.0000,37158.0000,37378.0000,37178.0000,37129.0000,37328.0000,37099.0000,37239.0000,37248.0000,37259.0000,37449.0000,37209.0000,37148.0000,36808.0000,37018.0000,37119.0000,37289.0000,37198.0000,37359.0000,37169.0000,37268.0000,37348.0000,37399.0000,37029.0000,37118.0000,37409.0000,37149.0000,37298.0000,37048.0000,37279.0000,37248.0000,37149.0000,37148.0000,36898.0000,36969.0000,37158.0000,37399.0000,37209.0000,37178.0000,37199.0000,37329.0000,37289.0000,37238.0000,37479.0000,37109.0000,37419.0000,37068.0000,37249.0000,37229.0000,37359.0000,37589.0000,37249.0000,37048.0000,37239.0000,37238.0000,37319.0000,37059.0000,37048.0000,37058.0000,37219.0000,37409.0000,37199.0000,37238.0000,37299.0000,37229.0000,37288.0000,36898.0000,36928.0000,36968.0000,37028.0000,37139.0000,37278.0000,37229.0000,36908.0000,37299.0000,42188.0000,37128.0000,37168.0000,37179.0000,37038.0000,37249.0000,37088.0000,37469.0000,36848.0000,37319.0000,37148.0000,37128.0000,37179.0000,37238.0000,37269.0000,37108.0000,37268.0000,37409.0000" +normalizing a fully mergeable tiling of boxes - 2,"large, embedded in 3d",100,1,3948000,37847.0700,37757.5600,38095.4200,683.7265,158.6699,1416.7781,"37659.0000,37550.0000,43600.0000,38460.0000,37950.0000,38000.0000,37809.0000,37750.0000,37670.0000,37859.0000,37770.0000,37559.0000,37550.0000,37529.0000,37870.0000,37870.0000,37599.0000,37599.0000,37760.0000,37990.0000,37790.0000,37800.0000,37779.0000,37740.0000,37720.0000,37719.0000,37750.0000,37649.0000,37780.0000,37800.0000,37639.0000,37640.0000,37950.0000,37709.0000,37570.0000,37589.0000,37599.0000,37489.0000,37619.0000,37740.0000,37719.0000,37650.0000,37729.0000,37890.0000,37850.0000,37760.0000,37679.0000,37660.0000,37779.0000,37830.0000,37610.0000,37729.0000,37830.0000,37730.0000,37779.0000,37750.0000,37810.0000,37779.0000,37749.0000,37650.0000,37869.0000,37719.0000,37990.0000,37740.0000,37739.0000,37779.0000,37960.0000,37890.0000,37910.0000,37579.0000,37710.0000,37429.0000,37800.0000,37759.0000,37639.0000,37639.0000,37670.0000,37850.0000,37759.0000,37940.0000,37680.0000,37800.0000,37569.0000,37930.0000,37880.0000,37870.0000,37639.0000,37940.0000,37720.0000,37649.0000,37610.0000,37809.0000,37920.0000,37780.0000,37900.0000,37870.0000,37790.0000,37639.0000,41146.0000,37760.0000" +normalizing a fully mergeable tiling of boxes - 3,"small, native",100,97,2386200,217.2029,216.6489,219.7951,5.1628,0.2225,12.2528,"216.1546,216.4742,222.3608,216.1546,216.6701,216.5773,216.7835,216.5773,216.6804,216.3711,216.7835,216.2680,216.7835,216.6804,216.8866,216.4742,216.7835,216.7835,216.6804,216.8866,216.5670,216.8866,216.5773,216.8866,216.7835,216.7835,216.6804,216.7835,216.6804,216.5773,216.9897,216.5773,216.8866,216.6804,216.8866,216.6804,216.7835,216.5773,216.8866,216.4639,216.9794,216.4639,216.5773,216.9897,216.5773,216.8866,216.5773,216.7835,216.6804,216.6804,216.7835,216.7835,216.6804,216.7835,216.6804,216.4742,216.9897,216.5773,216.8866,216.5670,216.7732,216.7835,216.7835,216.7835,216.7835,216.6804,216.7835,216.6804,216.4742,216.9897,216.5773,216.8866,216.5773,216.7835,216.6804,216.6804,216.7835,268.2165,216.3711,216.6804,216.3711,216.3711,216.6804,216.3608,216.5670,216.3711,216.5773,216.2680,216.3711,216.2680,216.3711,216.2680,216.3608,216.2680,216.1649,216.5773,216.2680,216.4742,216.2680,216.4742" +normalizing a fully mergeable tiling of boxes - 3,"medium, native",100,17,2402100,1477.8576,1474.4135,1491.4176,29.7937,3.1268,68.9526,"1474.4118,1476.2353,1550.4706,1472.7059,1474.4706,1475.5882,1478.5882,1473.8824,1476.1765,1479.1765,1469.1765,1475.0000,1480.3529,1475.0588,1475.0000,1476.2353,1472.7059,1473.2353,1469.7647,1477.9412,1475.0588,1475.6471,1476.1765,1475.0588,1477.4118,1464.4118,1476.8235,1473.8235,1478.5882,1476.2353,1476.1765,1474.4706,1471.5294,1465.0000,1475.0000,1476.7647,1472.1176,1475.5882,1476.8235,1470.3529,1468.5294,1475.6471,1476.1765,1473.2941,1471.5294,1472.6471,1472.1176,1465.5882,1475.6471,1477.9412,1472.1176,1475.0588,1763.1765,1476.1765,1473.2941,1475.5882,1474.4706,1477.4118,1472.6471,1475.0588,1466.1765,1476.8235,1472.1176,1472.0588,1475.6471,1474.4118,1473.8824,1475.0588,1473.8235,1471.5294,1474.4118,1471.5294,1474.4706,1471.4706,1473.8824,1474.4118,1472.7059,1475.6471,1469.1176,1475.0588,1475.0000,1476.8235,1473.2941,1474.4118,1475.0588,1475.5882,1476.1765,1474.4706,1475.5882,1476.1765,1476.2353,1474.4118,1473.2353,1475.6471,1475.5882,1476.2353,1473.2941,1473.8235,1474.4706,1472.6471" +normalizing a fully mergeable tiling of boxes - 3,"large, native",100,1,4494400,45351.9600,45078.0700,45856.1500,1842.9638,1212.1758,3270.5682,"44633.0000,44572.0000,58048.0000,51726.0000,49262.0000,47378.0000,45975.0000,48820.0000,48540.0000,47177.0000,46446.0000,45394.0000,47417.0000,46215.0000,46096.0000,49021.0000,47348.0000,46626.0000,45945.0000,45765.0000,45364.0000,45054.0000,44843.0000,44773.0000,44933.0000,44623.0000,44993.0000,45595.0000,46556.0000,48430.0000,47548.0000,45715.0000,45184.0000,44973.0000,44883.0000,44533.0000,44793.0000,44933.0000,44492.0000,44652.0000,44653.0000,44713.0000,44783.0000,44673.0000,44742.0000,44542.0000,44332.0000,44422.0000,44402.0000,44753.0000,44542.0000,44403.0000,44532.0000,44382.0000,44523.0000,44372.0000,44603.0000,44462.0000,44452.0000,44483.0000,44502.0000,44453.0000,44542.0000,44613.0000,44272.0000,44552.0000,44282.0000,44333.0000,44191.0000,44632.0000,44483.0000,44562.0000,44422.0000,44602.0000,44352.0000,44423.0000,44342.0000,44592.0000,44513.0000,44522.0000,44763.0000,44593.0000,47608.0000,44944.0000,44512.0000,44733.0000,44903.0000,44753.0000,44603.0000,44753.0000,44382.0000,44512.0000,44552.0000,44923.0000,44683.0000,44462.0000,44402.0000,44642.0000,44502.0000,44743.0000" +performing set operations between randomized regions - 2d,"union, small, native",100,27,2419200,908.9141,906.9744,914.7152,15.5255,3.8205,33.5608,"907.9259,908.3333,971.4074,911.6667,908.2963,908.3333,908.6667,910.1852,908.3333,904.9630,901.6296,904.2593,906.8148,906.4815,911.2963,908.6667,909.4444,908.2963,911.2963,909.0741,904.9630,908.7037,906.4444,902.7778,907.5556,912.0370,909.4444,903.4815,904.2222,904.2593,910.1481,909.8148,906.1111,906.8148,908.7037,904.5926,910.5556,910.5556,908.2963,903.1481,906.4444,906.4815,906.8148,1045.2593,910.8889,909.8148,910.1852,897.5556,908.6667,907.5926,904.9630,902.7778,905.7037,903.1111,905.3333,905.3333,908.7037,909.0741,902.7407,896.0741,909.4444,909.0370,909.8148,907.1852,905.3704,910.5556,905.3333,903.8519,909.8148,909.8148,908.6667,909.0741,896.0741,910.5556,911.2593,910.1852,906.1111,904.9630,904.5926,898.2963,902.7407,906.8148,907.9630,906.4444,906.1111,902.7407,909.0741,909.0370,905.7037,904.9630,908.7037,899.7778,902.3704,907.1852,909.0741,913.4815,909.7778,911.2963,897.9259,908.7037" +performing set operations between randomized regions - 2d,"union, small, embedded in 3d",100,24,2416800,1043.3275,1042.3742,1046.1925,7.7166,3.1772,16.8912,"1043.1667,1038.9583,1112.4583,1054.8333,1050.6667,1051.9167,1050.6667,1047.7083,1045.2500,1043.1667,1048.1250,1046.0833,1049.0000,1042.7083,1041.8750,1039.7917,1043.1667,1043.5833,1045.2083,1044.0000,1046.4583,1043.9583,1043.5833,1043.1250,1041.0833,1043.1250,1044.0000,1043.9583,1040.6250,1041.4583,1044.7917,1037.2917,1044.4167,1043.1667,1042.2917,1040.6667,1043.5417,1045.2500,1039.3750,1037.2917,1041.4583,1038.9583,1043.5833,1044.8333,1042.7083,1042.3333,1038.9583,1041.0417,1043.1667,1044.4167,1042.7083,1041.5000,1041.8750,1037.2917,1043.5417,1048.1667,1043.5417,1041.5000,1040.2083,1044.8333,1042.2917,1041.4583,1040.6250,1040.6250,1044.0000,1046.0417,1042.3333,1041.4583,1036.5000,1040.6250,1041.8750,1046.4583,1044.0000,1038.9583,1040.6667,1035.6250,1041.4583,1044.4167,1045.2500,1043.5417,1040.6667,1035.6250,1035.6250,1038.5417,1041.9167,1044.8333,1041.8750,1043.1667,1040.6250,1038.1250,1039.8333,1045.2083,1043.5417,1038.9583,1039.7917,1042.7083,1045.2500,1038.1250,1041.9167,1039.7917" +performing set operations between randomized regions - 2d,"intersection, small, native",100,104,2371200,240.3737,240.0641,241.4552,2.6208,0.7227,5.9321,"239.8558,239.8654,246.5096,239.7596,241.5962,241.7885,241.5000,240.1442,238.7019,240.9231,239.7596,241.0192,239.6635,240.4423,240.8173,239.5769,240.3365,240.3462,240.4327,240.7308,240.4327,240.7308,240.5288,239.7692,240.3365,240.0577,239.7596,240.6250,239.8558,239.4712,239.4712,240.1442,240.1442,240.3462,240.3365,240.7308,240.1442,239.0962,240.0481,241.1154,240.4327,240.5385,240.1442,240.2500,239.9519,240.6346,239.6635,240.9231,240.0481,240.3365,239.9519,240.5288,239.1923,240.5288,240.2500,240.3365,240.4423,240.3365,241.0192,238.7981,240.1442,240.8269,240.8269,239.9519,240.2404,239.9615,238.6058,240.0481,240.6346,239.7596,240.2500,239.7596,240.6346,264.8077,238.7019,239.7692,240.1442,240.5385,239.8558,239.9519,239.9615,238.4135,240.1442,239.7692,239.5673,240.1538,239.5673,240.1442,237.8365,240.0577,239.2788,239.8558,239.1827,239.9519,239.4712,239.0000,238.9904,239.7596,239.9615,239.6635" +performing set operations between randomized regions - 2d,"intersection, small, embedded in 3d",100,94,2378200,248.5362,247.9887,249.9421,4.1947,2.0605,8.6655,"255.5745,255.2447,255.2447,247.6809,247.6809,247.4681,247.3617,247.2553,247.6809,247.5745,247.5745,247.2553,247.4681,247.7872,247.3617,247.3617,247.3617,247.6809,247.3617,247.2553,247.3617,248.0000,247.2553,247.3617,247.4681,247.3617,247.5745,247.4681,247.4681,247.3723,247.5745,247.3617,247.7872,247.3617,247.5851,247.3617,247.5745,247.3617,247.4681,247.6915,247.3617,247.3617,247.4681,248.0000,247.2660,247.2553,247.3617,247.6809,247.2553,247.5851,247.2553,247.4681,247.5745,247.3617,247.9043,247.2553,247.5745,247.2553,247.4681,247.2553,247.5745,247.3617,247.5745,247.4681,247.2553,247.6915,247.4681,247.3617,247.2553,247.7872,247.3723,247.4681,247.2553,247.7872,247.2553,247.3723,247.5745,247.4681,247.5745,247.3617,247.3723,247.4681,247.6809,247.3617,247.5745,247.2553,247.4681,247.4681,247.4681,247.4681,247.3617,247.4681,247.5745,283.7128,255.4574,255.6809,256.3191,253.2234,255.3617,255.5638" +performing set operations between randomized regions - 2d,"difference, small, native",100,24,2460000,957.0421,954.1229,964.5787,22.2451,10.7335,45.2887,"954.6250,953.8333,1012.7083,952.9583,950.4583,949.2500,955.4583,952.5833,955.8750,958.4167,956.3333,958.7917,955.0833,958.0000,955.8750,953.8333,956.7083,958.0000,955.9167,957.1250,955.0833,958.0000,952.1250,955.9167,958.3750,955.9167,952.5417,956.7500,954.6250,957.9583,953.4167,957.1250,954.6667,954.2083,956.3333,954.6250,957.1250,1037.7083,949.6250,949.2083,946.7083,949.6667,949.2083,948.3750,948.7917,950.0833,947.5417,1140.0000,947.1250,948.7917,949.2500,951.2917,951.7083,948.8333,949.2083,949.6250,950.5000,950.8750,948.7917,950.5000,950.0417,950.4583,950.9167,949.2083,950.4583,950.0833,950.4583,950.0417,949.6667,949.6250,951.2917,947.9583,951.3333,950.4583,949.2083,948.0000,949.2083,948.7917,950.0833,1018.5000,956.3333,954.6250,957.1667,953.3750,954.6667,955.4583,956.7500,954.6250,955.4583,956.3333,955.0417,955.5000,957.5417,955.5000,956.3333,954.2083,959.6667,957.5833,957.5417,954.6667" +performing set operations between randomized regions - 2d,"difference, small, embedded in 3d",100,21,2457000,1260.0714,1254.8905,1273.9805,39.8467,18.0604,83.6438,"1250.3810,1251.8095,1598.1429,1357.2381,1341.9524,1294.7619,1290.4286,1277.0952,1279.4762,1295.2381,1263.2381,1263.7619,1275.6667,1258.9524,1259.4286,1253.7143,1249.9048,1257.0476,1264.7143,1250.8095,1250.3333,1251.3333,1246.5714,1250.8571,1247.0476,1258.4762,1244.1429,1247.9524,1250.8571,1252.2857,1252.7619,1249.4286,1241.2857,1244.6190,1246.5714,1281.3810,1374.4286,1249.4286,1259.4286,1248.0000,1246.0476,1250.3810,1251.8095,1258.4762,1248.0000,1259.4286,1251.3333,1236.0952,1246.5238,1258.0476,1266.5714,1247.0476,1248.9524,1247.4762,1245.6190,1248.9524,1247.5238,1250.3810,1246.5238,1254.1905,1258.5238,1250.3333,1253.2381,1246.0952,1251.3333,1248.9524,1238.4286,1250.8571,1241.8095,1239.8571,1261.3333,1248.9048,1251.3333,1250.3810,1254.1905,1254.6667,1254.6667,1257.0476,1254.6667,1255.1429,1245.0952,1261.3810,1253.7143,1247.9524,1256.0952,1246.5238,1241.8095,1250.8571,1242.2381,1251.3333,1248.9524,1260.8571,1252.2857,1243.2381,1248.4762,1238.4286,1250.8571,1251.3333,1249.9048,1253.7143" +performing set operations between randomized regions - 2d,"union, medium, native",100,2,2629400,13520.1150,13481.1800,13619.6100,289.3996,72.0145,556.1872,"13529.5000,13610.0000,15809.0000,13790.5000,13574.5000,13620.0000,13504.5000,13570.0000,13499.5000,13509.5000,13560.0000,13559.5000,13595.0000,13424.5000,13454.5000,13494.5000,13379.5000,13560.0000,13424.5000,13464.5000,13394.5000,13489.5000,13414.5000,13454.5000,13519.5000,13474.5000,13474.5000,13439.5000,13454.5000,13429.5000,13424.5000,13449.5000,13309.5000,13474.5000,13449.5000,13544.5000,13444.5000,13429.5000,13534.5000,13434.5000,13394.5000,13539.5000,13650.0000,13540.0000,13524.5000,13444.5000,13509.5000,13389.5000,13479.5000,13449.5000,13449.5000,13444.5000,13489.5000,13489.5000,13464.5000,13460.0000,13459.5000,13424.5000,13494.5000,13444.5000,13504.5000,13369.5000,13504.5000,13475.0000,13549.5000,13529.5000,13500.0000,13459.5000,13429.5000,13424.5000,13559.5000,13434.5000,13475.0000,13444.5000,13459.5000,15117.5000,13515.0000,13549.5000,13384.5000,13469.5000,13469.5000,13485.0000,13489.5000,13544.5000,13495.0000,13449.5000,13394.5000,13479.5000,13439.5000,13449.5000,13399.5000,13494.5000,13454.5000,13434.5000,13474.5000,13494.5000,13494.5000,13575.0000,13414.5000,13409.5000" +performing set operations between randomized regions - 2d,"union, medium, embedded in 3d",100,2,2940000,13414.2500,13388.9800,13510.2500,221.7893,51.6611,514.1032,"13354.0000,13339.5000,15543.5000,13740.0000,13570.0000,13439.5000,13359.0000,13485.0000,13394.0000,13475.0000,13379.0000,13379.5000,13334.5000,13374.5000,13374.5000,13379.0000,13324.5000,13434.5000,13449.5000,13354.5000,13399.5000,13394.5000,13424.5000,13419.5000,13384.0000,13404.5000,13369.5000,13364.5000,13449.5000,13394.5000,13429.5000,13429.5000,13374.5000,13399.0000,13389.0000,13359.5000,13399.5000,13374.5000,13484.5000,13369.5000,13369.5000,13404.0000,13399.0000,13399.5000,13404.5000,13394.5000,13419.5000,13414.5000,13359.5000,13349.0000,13354.5000,13384.5000,13424.5000,13394.5000,13404.5000,13349.0000,13364.0000,13329.5000,13354.5000,13374.0000,13419.5000,13284.5000,13409.5000,13399.5000,13304.0000,13419.5000,13324.5000,13339.0000,13384.5000,13299.5000,13424.5000,13389.5000,13384.0000,13459.5000,13429.5000,13379.0000,13374.5000,13449.5000,13464.5000,13324.5000,13394.5000,13394.5000,13294.0000,13339.5000,13489.5000,13434.5000,13409.5000,13344.5000,13399.5000,13414.5000,13384.5000,13344.0000,13404.5000,13454.5000,13364.5000,13414.5000,13384.5000,13339.0000,13274.0000,13339.5000" +performing set operations between randomized regions - 2d,"intersection, medium, native",100,11,2423300,2269.8509,2265.6464,2286.1873,38.0615,6.0567,89.6124,"2248.6364,2274.1818,2308.7273,2275.9091,2275.0000,2268.6364,2264.1818,2265.0000,2270.5455,2274.0909,2267.7273,2275.9091,2270.4545,2259.5455,2262.2727,2265.9091,2262.3636,2642.0909,2265.9091,2255.0000,2276.9091,2269.5455,2269.6364,2264.0909,2268.6364,2271.3636,2268.6364,2266.9091,2268.6364,2267.8182,2274.0909,2263.1818,2266.0000,2266.8182,2268.7273,2258.6364,2266.8182,2269.6364,2270.4545,2270.4545,2270.4545,2268.6364,2272.3636,2269.5455,2258.6364,2261.3636,2266.8182,2258.6364,2265.0909,2265.0000,2266.8182,2257.8182,2260.4545,2259.5455,2259.5455,2270.5455,2265.9091,2268.6364,2277.8182,2273.2727,2274.0909,2262.2727,2259.6364,2268.6364,2265.9091,2265.0909,2258.6364,2256.8182,2253.1818,2266.9091,2262.2727,2259.5455,2257.7273,2269.6364,2269.5455,2272.3636,2268.6364,2260.4545,2258.6364,2266.9091,2266.8182,2265.0909,2262.2727,2264.0909,2255.9091,2266.0000,2269.5455,2261.3636,2256.8182,2263.1818,2263.1818,2264.0909,2272.3636,2268.6364,2262.3636,2259.5455,2263.1818,2263.1818,2262.2727,2262.2727" +performing set operations between randomized regions - 2d,"intersection, medium, embedded in 3d",100,11,2484900,2275.0927,2274.1255,2276.6136,6.0670,4.2338,10.6362,"2281.3636,2277.7273,2316.0000,2286.0000,2288.7273,2281.3636,2285.0909,2279.6364,2276.8182,2266.0000,2275.9091,2274.1818,2271.3636,2272.3636,2274.0909,2275.0909,2278.6364,2275.0909,2271.3636,2274.1818,2272.2727,2277.8182,2278.6364,2282.2727,2275.0909,2270.4545,2265.0000,2272.2727,2273.1818,2274.0909,2278.7273,2272.2727,2268.7273,2275.0000,2270.4545,2278.6364,2272.3636,2271.3636,2276.0000,2270.4545,2270.5455,2273.1818,2272.3636,2265.9091,2274.0909,2271.3636,2274.0909,2275.0000,2280.5455,2279.5455,2270.5455,2275.9091,2275.0909,2278.6364,2275.0000,2273.2727,2275.9091,2270.5455,2275.9091,2279.6364,2272.2727,2264.1818,2273.1818,2277.8182,2272.2727,2270.5455,2272.2727,2275.0909,2275.0000,2272.3636,2268.6364,2280.5455,2277.7273,2273.2727,2274.0909,2276.0000,2274.0909,2273.1818,2265.9091,2281.4545,2270.4545,2279.5455,2277.8182,2273.1818,2272.3636,2269.5455,2276.9091,2280.4545,2271.4545,2277.7273,2275.0909,2273.1818,2275.9091,2276.9091,2276.8182,2262.2727,2278.6364,2276.0000,2273.1818,2278.7273" +performing set operations between randomized regions - 2d,"difference, medium, native",100,3,2458800,7684.2733,7658.5333,7750.1333,184.6710,23.9588,334.8667,"7687.3333,7687.3333,8956.3333,7777.3333,7700.6667,7664.0000,7690.6667,7700.6667,7650.3333,7664.0000,7664.0000,7670.3333,7664.0000,7680.6667,7667.0000,7677.0000,7640.6667,7667.0000,7637.0000,7640.6667,7640.3333,7630.6667,7663.6667,7630.6667,7617.0000,7654.0000,7637.0000,7634.0000,7647.3333,7640.3333,7647.3333,7673.6667,7650.6667,7617.0000,7684.0000,7654.0000,7657.0000,7657.0000,7637.3333,7653.6667,7670.6667,7637.0000,7643.6667,7630.6667,7637.0000,7617.3333,7663.6667,7647.3333,7674.0000,7650.3333,7674.0000,7674.0000,7687.0000,7677.3333,7654.0000,7674.0000,7657.0000,7634.0000,7683.6667,7643.6667,7654.0000,7673.6667,7650.6667,7660.6667,7660.3333,7684.0000,7690.6667,8976.3333,7667.3333,7670.6667,7653.6667,7667.3333,7640.3333,7627.0000,7667.3333,7660.3333,7640.6667,7664.0000,7630.3333,7660.6667,7640.3333,7664.0000,7633.6667,7667.0000,7657.3333,7680.6667,7640.3333,7650.6667,7677.0000,7627.0000,7677.3333,7627.0000,7694.0000,7680.6667,7623.6667,7617.3333,7643.6667,7647.3333,7700.6667,7633.6667" +performing set operations between randomized regions - 2d,"difference, medium, embedded in 3d",100,3,2568900,8776.5733,8765.0167,8817.5233,99.6327,28.7624,227.9856,"8769.0000,8752.3333,9721.0000,8836.3333,8776.0000,8799.3333,8769.3333,8796.0000,8796.0000,8769.3333,8779.3333,8762.6667,8749.3333,8786.0000,8782.6667,8732.6667,8742.6667,8752.6667,8752.3333,8769.0000,8745.6667,8792.6667,8739.0000,8742.6667,8832.6667,8755.6667,8696.0000,8752.6667,8762.6667,8802.6667,8789.3333,8739.3333,8799.3333,8756.0000,8796.0000,8812.6667,8776.0000,8776.0000,8806.0000,8786.0000,8755.6667,8799.3333,8782.6667,8776.0000,8759.3333,8762.6667,8792.6667,8749.3333,8725.6667,8813.0000,8769.3333,8802.6667,8756.0000,8749.0000,8792.6667,8776.0000,8772.6667,8769.3333,8796.0000,8782.6667,8806.0000,8776.0000,8749.3333,8772.6667,8742.6667,8739.3333,8792.6667,8735.6667,8762.3333,8742.6667,8722.6667,8776.0000,8792.6667,8739.3333,8732.6667,8799.3333,8722.6667,8782.6667,8762.6667,8812.6667,8655.6667,8779.3333,8772.6667,8776.0000,8732.6667,8746.0000,8802.6667,8789.3333,8766.0000,8799.3333,8799.3333,8722.6667,8776.0000,8726.0000,8769.3333,8729.0000,8709.0000,8776.0000,8749.3333,8682.6667" +performing set operations between randomized regions - 2d,"union, large, native",100,1,15856300,158790.4200,158174.4300,159430.9500,3202.1340,2594.6242,4262.0851,"153048.0000,152567.0000,165762.0000,160833.0000,159290.0000,159150.0000,159921.0000,159541.0000,159130.0000,158538.0000,162937.0000,159531.0000,159480.0000,160031.0000,159401.0000,159670.0000,159411.0000,159901.0000,159440.0000,159460.0000,159610.0000,158499.0000,159670.0000,159401.0000,159440.0000,159270.0000,159651.0000,159711.0000,159219.0000,158879.0000,159009.0000,159501.0000,159240.0000,159650.0000,159431.0000,173096.0000,159711.0000,158849.0000,159200.0000,159620.0000,159691.0000,159981.0000,159169.0000,159521.0000,159019.0000,159871.0000,160423.0000,159670.0000,159641.0000,158909.0000,159962.0000,159941.0000,159480.0000,158809.0000,159490.0000,160242.0000,159861.0000,159580.0000,159922.0000,159059.0000,163268.0000,159380.0000,159290.0000,159931.0000,158929.0000,159320.0000,159831.0000,160042.0000,159560.0000,159721.0000,159891.0000,159911.0000,159320.0000,159099.0000,159190.0000,159170.0000,160092.0000,159240.0000,159149.0000,159882.0000,159540.0000,159521.0000,159350.0000,159931.0000,159511.0000,168617.0000,152918.0000,153109.0000,152607.0000,152858.0000,152427.0000,152657.0000,152598.0000,152767.0000,153058.0000,152978.0000,152306.0000,152558.0000,153148.0000,152427.0000" +performing set operations between randomized regions - 2d,"union, large, embedded in 3d",100,1,16929000,171597.9000,171439.2800,171864.1700,1027.7852,669.2869,1464.3804,"171122.0000,171313.0000,176443.0000,172835.0000,172354.0000,171353.0000,171523.0000,171494.0000,171553.0000,175400.0000,171473.0000,171663.0000,171714.0000,172094.0000,171664.0000,171122.0000,171283.0000,171373.0000,170792.0000,171373.0000,171192.0000,171272.0000,171483.0000,171653.0000,171403.0000,171153.0000,170952.0000,171924.0000,171713.0000,171103.0000,171473.0000,171413.0000,176533.0000,171212.0000,171043.0000,171212.0000,171453.0000,171302.0000,171263.0000,171663.0000,171362.0000,171664.0000,171603.0000,171173.0000,171363.0000,171162.0000,170912.0000,171153.0000,171643.0000,171433.0000,171634.0000,171132.0000,171413.0000,170782.0000,171063.0000,172615.0000,175430.0000,171583.0000,171393.0000,171052.0000,171764.0000,171122.0000,172014.0000,171484.0000,171202.0000,170922.0000,171203.0000,171734.0000,171503.0000,171453.0000,170742.0000,171423.0000,171252.0000,171073.0000,170972.0000,171683.0000,171133.0000,171683.0000,171263.0000,175080.0000,171333.0000,171483.0000,171533.0000,171183.0000,171343.0000,171102.0000,170912.0000,171323.0000,171563.0000,171132.0000,170812.0000,171002.0000,171804.0000,171303.0000,171463.0000,171363.0000,171202.0000,171423.0000,170812.0000,171073.0000" +performing set operations between randomized regions - 2d,"intersection, large, native",100,2,4052400,18786.4300,18750.4000,18886.3150,276.1694,60.8825,572.3729,"18739.5000,18739.5000,20122.5000,18950.0000,18844.5000,18799.5000,18834.5000,18870.0000,18834.5000,18754.5000,18830.0000,18724.5000,18799.5000,18785.0000,18829.5000,18784.5000,18765.0000,18779.5000,18764.5000,18779.5000,18750.0000,18669.0000,18875.0000,18799.5000,18739.5000,18704.5000,18754.5000,18699.5000,18739.5000,18764.5000,18684.5000,18880.0000,18724.5000,18754.5000,18724.5000,18709.5000,18709.5000,18649.5000,18729.5000,18744.5000,18744.5000,18775.0000,18749.5000,18644.5000,18689.0000,18719.5000,18779.5000,18789.5000,18714.5000,18749.5000,18659.5000,18729.5000,18709.5000,18770.0000,18714.5000,18744.5000,18749.5000,18659.5000,18654.5000,18689.5000,18734.5000,18819.5000,18784.5000,18714.5000,18689.0000,18619.0000,18734.5000,18760.0000,18739.5000,18759.5000,18819.5000,18704.5000,18775.0000,18764.5000,18779.5000,18759.5000,18724.5000,18784.5000,21109.0000,18744.5000,18880.0000,18754.5000,18724.5000,18714.5000,18699.5000,18794.5000,18745.0000,18784.5000,18684.5000,18759.5000,18729.5000,18744.5000,18760.0000,18669.0000,18755.0000,18709.5000,18659.5000,18714.5000,18689.5000,18779.5000" +performing set operations between randomized regions - 2d,"intersection, large, embedded in 3d",100,2,4155800,21257.0850,21229.8700,21322.6700,201.4872,65.1996,370.6483,"21234.0000,21139.0000,22411.5000,21324.5000,21349.5000,21219.5000,21299.5000,21224.0000,21329.5000,21114.0000,21269.5000,21274.5000,21299.5000,21139.0000,21284.5000,21169.0000,21254.5000,21119.0000,21184.0000,21239.5000,21189.0000,21289.5000,21269.5000,21319.5000,21194.0000,21289.5000,21259.5000,21309.5000,21209.0000,21229.5000,21199.0000,21129.0000,21349.5000,21219.0000,21269.5000,21219.0000,21279.5000,21124.0000,21219.5000,21204.0000,21204.5000,21184.0000,21189.5000,21224.0000,21194.5000,21259.0000,21159.5000,21344.5000,21119.0000,21314.5000,21174.0000,22757.5000,21209.0000,21334.5000,21194.5000,21274.0000,21179.5000,21324.5000,21204.0000,21294.5000,21234.5000,21279.0000,21264.5000,21154.0000,21284.5000,21244.5000,21319.5000,21164.0000,21279.5000,21149.0000,21224.5000,21204.0000,21189.5000,21229.0000,21354.5000,21169.5000,21179.0000,21219.5000,21149.0000,21299.5000,21134.0000,21204.0000,21199.5000,21269.5000,21184.0000,21269.5000,21139.0000,21189.0000,21194.5000,21254.5000,21174.0000,21269.5000,21259.0000,21044.0000,21299.5000,21184.5000,21344.5000,21254.0000,21244.5000,21219.0000" +performing set operations between randomized regions - 2d,"difference, large, native",100,1,63392800,637186.5400,635742.5800,638391.8500,6731.0094,5644.1086,7866.7962,"627698.0000,621025.0000,650592.0000,639670.0000,640562.0000,638709.0000,641073.0000,638148.0000,640121.0000,640052.0000,638348.0000,642966.0000,638578.0000,641795.0000,644910.0000,638739.0000,636444.0000,638889.0000,634852.0000,640452.0000,644469.0000,641615.0000,639460.0000,636594.0000,637246.0000,636665.0000,645131.0000,639971.0000,642436.0000,639340.0000,638719.0000,643819.0000,643788.0000,644630.0000,634812.0000,641393.0000,637667.0000,639220.0000,638448.0000,645091.0000,639600.0000,642636.0000,636645.0000,640031.0000,638317.0000,643859.0000,636304.0000,640212.0000,638177.0000,641805.0000,640021.0000,639741.0000,642035.0000,638739.0000,637787.0000,635923.0000,642316.0000,638027.0000,642065.0000,644891.0000,642596.0000,637717.0000,639821.0000,636475.0000,642305.0000,638649.0000,638979.0000,638198.0000,643177.0000,637817.0000,639580.0000,635673.0000,635112.0000,636174.0000,641394.0000,633429.0000,636895.0000,638128.0000,638217.0000,639070.0000,641063.0000,638228.0000,639219.0000,642747.0000,645261.0000,624923.0000,623029.0000,625243.0000,622048.0000,634891.0000,621506.0000,625213.0000,621597.0000,625253.0000,621766.0000,624041.0000,629241.0000,619232.0000,619843.0000,621636.0000" +performing set operations between randomized regions - 2d,"difference, large, embedded in 3d",100,1,67525800,671807.4600,669864.1200,673640.3400,9628.9044,8960.9860,10423.3460,"680348.0000,691629.0000,666591.0000,659789.0000,659228.0000,660530.0000,668675.0000,659228.0000,658406.0000,659909.0000,658195.0000,661603.0000,665339.0000,655330.0000,658266.0000,655681.0000,660841.0000,660580.0000,665580.0000,660901.0000,658516.0000,660019.0000,661302.0000,658666.0000,671751.0000,659969.0000,661932.0000,659118.0000,658877.0000,659488.0000,666361.0000,659137.0000,659338.0000,658486.0000,657935.0000,657675.0000,667543.0000,659218.0000,661602.0000,658526.0000,660831.0000,660530.0000,668816.0000,683784.0000,678925.0000,678775.0000,677803.0000,680989.0000,683554.0000,677071.0000,677232.0000,678344.0000,678845.0000,678043.0000,683854.0000,677793.0000,676751.0000,677793.0000,677702.0000,679927.0000,682722.0000,676811.0000,677402.0000,679336.0000,678384.0000,680438.0000,682852.0000,677733.0000,676921.0000,677813.0000,677272.0000,682101.0000,682522.0000,676901.0000,677362.0000,679817.0000,678334.0000,677472.0000,681881.0000,678764.0000,677302.0000,679035.0000,675869.0000,678955.0000,683604.0000,677242.0000,676440.0000,678524.0000,681559.0000,684937.0000,677612.0000,679987.0000,676480.0000,678113.0000,676951.0000,688413.0000,671301.0000,680608.0000,678695.0000,676811.0000" +performing set operations between randomized regions - 3d,"union, small, native",100,7,2651600,3955.0429,3882.8514,4301.7800,691.1705,13.2872,1647.1789,"3881.2857,3880.0000,4296.4286,3907.1429,3898.5714,3885.7143,3895.5714,3895.5714,3878.5714,3879.8571,3908.4286,3884.2857,3888.5714,3897.1429,3888.4286,3874.2857,3882.7143,3895.7143,3884.2857,3878.4286,3891.4286,3894.2857,3902.8571,3888.5714,3901.2857,3858.5714,3889.8571,3907.1429,3885.7143,3881.4286,3877.0000,3878.5714,3879.8571,3901.4286,3905.7143,3874.2857,3892.7143,3875.5714,3885.7143,10818.7143,3892.8571,3868.5714,3895.5714,3875.7143,3892.8571,3875.5714,3891.4286,3891.4286,3874.1429,3865.7143,3874.1429,3861.2857,3867.0000,3855.5714,3872.8571,3872.7143,3891.4286,3881.2857,3884.1429,3857.0000,3894.2857,3878.5714,3878.4286,3871.4286,3877.0000,3882.8571,3901.4286,3884.1429,3867.0000,3872.7143,3892.8571,3860.0000,3872.7143,3875.7143,3889.8571,3865.7143,3881.2857,3875.7143,3881.2857,3868.4286,3885.7143,3872.7143,3895.7143,3862.7143,3882.8571,3884.1429,3874.1429,3864.1429,3891.4286,3854.1429,3877.1429,3862.7143,3887.1429,3865.5714,3874.2857,3877.0000,3892.7143,3878.5714,3881.2857,3855.7143" +performing set operations between randomized regions - 3d,"intersection, small, native",100,169,2366000,148.5298,148.2103,149.6789,2.7903,0.6958,6.4503,"148.6154,148.6095,149.1479,148.0237,148.3728,148.7337,149.1479,149.2663,149.2071,148.4911,148.4320,149.2663,149.3254,149.0888,148.1953,148.3136,148.9704,147.8402,147.1953,148.9645,147.9586,147.8994,146.1775,148.2604,149.0296,149.2071,148.4911,148.0828,147.8994,147.8402,148.3195,148.4911,147.9053,148.1953,149.0888,148.0828,147.8402,147.8462,145.9467,148.9053,146.6568,147.4260,147.9586,148.3195,148.3787,147.9586,148.2012,148.4911,147.8462,147.7811,148.0828,148.8521,147.8994,147.0710,148.8521,148.0828,148.6686,148.8521,148.9112,148.2012,147.8402,148.5562,148.9112,148.7278,148.4379,148.2012,148.7278,148.8521,145.3550,147.9586,147.9645,147.9586,175.2959,148.8462,147.3728,148.9704,148.4911,148.7929,148.9704,148.8521,148.4911,148.6154,147.0710,148.9704,149.0888,148.3787,148.1953,148.9704,148.3787,146.5385,148.0769,148.7337,146.8343,147.0118,148.2604,148.2012,148.3728,148.9704,148.4379,147.1302" +performing set operations between randomized regions - 3d,"difference, small, native",100,19,2471900,1419.0868,1416.1311,1429.9637,25.8072,6.5937,59.6605,"1413.1053,1408.8947,1666.2105,1443.1579,1430.0000,1433.1053,1417.8421,1411.5263,1434.1579,1418.9474,1422.0526,1440.5263,1422.5789,1414.6842,1402.0526,1413.6316,1418.8947,1416.2632,1417.3158,1421.0526,1416.2632,1409.9474,1411.0000,1425.2105,1431.5789,1411.5263,1421.5263,1417.3158,1417.3158,1418.8947,1416.7895,1424.1579,1409.4211,1410.4737,1410.4737,1405.1579,1424.2105,1418.3684,1412.0526,1411.0000,1412.0526,1415.2105,1412.0526,1415.2105,1414.6842,1412.5789,1409.4211,1416.7895,1416.7895,1422.5789,1417.8421,1411.0000,1410.4211,1422.0526,1416.7895,1418.3684,1412.5789,1413.6316,1419.4211,1418.9474,1411.0000,1412.5263,1413.0526,1407.7895,1402.0526,1418.8947,1427.3684,1413.6316,1416.7895,1428.8947,1411.5263,1408.3684,1417.8421,1414.6842,1418.4211,1426.2632,1419.4211,1415.2105,1413.1053,1409.9474,1418.8947,1419.4211,1417.3158,1407.8421,1416.7895,1418.8947,1409.4211,1414.6842,1412.5789,1418.8947,1417.3158,1407.8421,1414.6842,1416.2632,1418.8947,1415.7895,1404.6316,1417.8421,1418.4211,1418.3684" +performing set operations between randomized regions - 3d,"union, medium, native",100,2,4479400,22534.1150,22464.8450,22767.9600,578.2640,191.8437,1295.9856,"22426.5000,22462.0000,27857.0000,23333.5000,22902.5000,22677.0000,22627.0000,22777.0000,22762.0000,22887.5000,22612.0000,22732.0000,22662.0000,22437.0000,22742.0000,22622.0000,22582.0000,22391.5000,22426.5000,22541.5000,22406.5000,22371.5000,22487.0000,22391.5000,22486.5000,22481.5000,22286.5000,22336.5000,22311.0000,22507.0000,22396.5000,22672.0000,22336.5000,22386.5000,22321.0000,22391.5000,22396.5000,22376.5000,22567.0000,22371.5000,22376.5000,22692.0000,22562.0000,22381.5000,22381.5000,22336.0000,22266.5000,22552.0000,22276.0000,22346.5000,22261.0000,22346.5000,22361.5000,22396.5000,22281.5000,22341.5000,22341.0000,22381.5000,22381.5000,22376.5000,22441.5000,22601.5000,22301.5000,22426.5000,22542.0000,22556.5000,22301.5000,22612.0000,22396.5000,22341.5000,22306.0000,22316.5000,22326.5000,22376.5000,22331.0000,22417.0000,23899.0000,22306.5000,22256.0000,22381.5000,22351.5000,22306.5000,22491.5000,22511.5000,22356.5000,22421.5000,22562.0000,22311.0000,22557.0000,22602.0000,22556.5000,22512.0000,22702.0000,22707.0000,22431.5000,22446.5000,22386.5000,22537.0000,22341.5000,22501.5000" +performing set operations between randomized regions - 3d,"intersection, medium, native",100,10,2581000,2556.7600,2555.4060,2558.4210,7.5893,6.0739,10.3917,"2556.6000,2556.6000,2591.7000,2562.7000,2564.6000,2552.6000,2559.7000,2562.6000,2555.7000,2554.6000,2559.7000,2542.6000,2562.7000,2565.7000,2558.6000,2553.7000,2562.7000,2552.6000,2547.6000,2544.7000,2555.6000,2555.7000,2557.6000,2562.6000,2564.7000,2565.7000,2559.6000,2561.7000,2546.6000,2552.7000,2558.6000,2546.7000,2555.6000,2553.7000,2557.6000,2567.6000,2543.7000,2549.6000,2560.7000,2554.6000,2551.7000,2550.6000,2552.7000,2557.6000,2555.7000,2551.6000,2563.7000,2552.6000,2549.7000,2556.6000,2550.7000,2557.6000,2556.6000,2539.6000,2557.7000,2549.6000,2553.7000,2551.6000,2558.7000,2562.7000,2565.6000,2586.7000,2543.6000,2558.6000,2556.7000,2564.7000,2561.6000,2548.7000,2552.6000,2557.7000,2559.6000,2549.6000,2550.6000,2554.7000,2545.6000,2557.7000,2549.6000,2556.7000,2565.7000,2565.6000,2565.7000,2567.7000,2555.6000,2562.7000,2555.7000,2555.6000,2555.7000,2552.6000,2555.7000,2557.6000,2560.7000,2558.7000,2555.6000,2560.7000,2555.6000,2551.7000,2548.6000,2549.7000,2564.6000,2553.7000" +performing set operations between randomized regions - 3d,"difference, medium, native",100,3,3374100,11406.7900,11388.6600,11472.8400,157.0100,43.1027,360.3121,"11381.0000,11374.3333,12903.6667,11511.3333,11487.6667,11481.3333,11431.0000,11434.3333,11340.6667,11341.0000,11377.6667,11407.6667,11344.0000,11397.3333,11421.0000,11364.0000,11481.3333,11411.0000,11477.6667,11351.0000,11397.6667,11384.0000,11411.0000,11414.3333,11434.3333,11421.0000,11441.0000,11517.6667,11441.0000,11404.3333,11451.0000,11381.0000,11421.0000,11367.3333,11414.3333,11384.0000,11381.0000,11381.0000,11474.3333,11337.6667,11337.6667,11364.0000,11397.6667,11377.6667,11427.6667,11367.6667,11414.3333,11371.0000,11397.6667,11357.3333,11448.0000,11397.3333,11384.0000,11444.6667,11314.0000,11451.0000,11357.6667,11357.6667,11344.0000,11371.0000,11387.6667,11391.0000,11347.3333,11340.6667,11314.0000,11374.0000,11451.3333,11360.6667,11371.0000,11284.0000,11367.6667,11367.6667,11471.0000,11377.6667,11381.0000,11371.0000,11350.6667,11447.6667,11354.0000,11367.6667,11334.3333,11364.0000,11387.6667,11381.0000,11347.6667,11367.3333,11364.3333,11431.0000,11347.6667,11444.3333,11371.0000,11394.3333,11357.3333,11414.0000,11350.6667,11387.6667,11414.3333,11290.6667,11391.0000,11401.0000" +performing set operations between randomized regions - 3d,"union, large, native",100,1,212501400,2124888.3800,2113187.0500,2134745.7800,54775.4361,46222.2917,62377.0154,"2147368.0000,2148060.0000,2161735.0000,2150955.0000,2155704.0000,2130215.0000,2011582.0000,2017742.0000,2014746.0000,2009397.0000,2015218.0000,2018393.0000,2015207.0000,2020438.0000,2015278.0000,2020929.0000,2017121.0000,2017712.0000,2010859.0000,2015488.0000,2010929.0000,2094477.0000,2167716.0000,2153190.0000,2158329.0000,2148330.0000,2159881.0000,2145133.0000,2155443.0000,2159161.0000,2156926.0000,2141387.0000,2152919.0000,2148821.0000,2160133.0000,2146436.0000,2240334.0000,2151736.0000,2151846.0000,2159571.0000,2144322.0000,2150574.0000,2147969.0000,2160494.0000,2158569.0000,2153440.0000,2158159.0000,2140074.0000,2142810.0000,2152337.0000,2144663.0000,2149523.0000,2152098.0000,2142739.0000,2149953.0000,2147147.0000,2148711.0000,2144192.0000,2150074.0000,2156766.0000,2150554.0000,2155464.0000,2150695.0000,2157127.0000,2162537.0000,2151506.0000,2146807.0000,2159040.0000,2148952.0000,2152898.0000,2146456.0000,2145195.0000,2149181.0000,2150083.0000,2156035.0000,2148750.0000,2158238.0000,2145765.0000,2155122.0000,2150604.0000,2147058.0000,2018013.0000,2022521.0000,2011240.0000,2016611.0000,2071234.0000,2152988.0000,2145103.0000,2146065.0000,2148320.0000,2146847.0000,2143440.0000,2149893.0000,2157698.0000,2151266.0000,2145785.0000,2146557.0000,2159471.0000,2149342.0000,2146918.0000" +performing set operations between randomized regions - 3d,"intersection, large, native",100,2,2986800,15050.9650,15031.0150,15112.9800,162.0936,49.2867,351.9210,"15138.0000,15067.5000,15589.0000,15082.5000,15052.5000,15102.5000,15063.0000,15117.5000,15107.5000,14992.5000,15022.5000,14977.5000,15062.5000,15052.5000,15087.5000,15017.5000,15092.5000,15093.0000,14997.5000,14972.0000,15037.5000,15022.5000,14992.5000,15012.5000,15087.5000,15017.5000,14997.5000,15087.5000,14992.5000,14997.5000,15062.5000,15088.0000,15107.5000,15037.5000,15098.0000,15037.5000,15022.5000,15032.5000,14962.5000,15027.5000,15087.5000,15107.5000,15017.5000,15088.0000,14977.5000,15002.5000,14987.0000,14982.0000,15123.0000,15037.5000,15017.5000,14987.5000,15017.5000,14992.5000,15012.5000,15032.5000,14927.5000,15017.5000,15047.5000,14947.5000,15042.5000,15062.5000,15077.5000,14992.5000,14987.5000,15027.5000,14962.5000,15067.5000,15047.5000,14972.5000,15042.5000,14977.5000,14992.5000,15072.5000,14987.5000,15012.5000,14982.5000,15012.5000,14972.5000,15057.5000,14987.5000,15037.5000,14997.5000,15048.0000,15002.0000,14962.0000,14942.5000,14992.5000,15042.5000,14967.5000,15037.5000,15087.5000,15043.0000,15012.5000,15027.5000,15072.5000,15007.5000,15137.5000,15027.5000,16495.5000" +performing set operations between randomized regions - 3d,"difference, large, native",100,1,627379000,6214396.7000,6190542.2800,6238259.2700,121809.7846,104017.6352,158682.9094,"6303525.0000,6297555.0000,6453380.0000,6732809.0000,6299969.0000,6272438.0000,6293888.0000,6253962.0000,6181094.0000,6022774.0000,5968741.0000,5981906.0000,5950517.0000,6160324.0000,6264592.0000,6294810.0000,6255054.0000,6268209.0000,6273419.0000,6204068.0000,6001012.0000,5993308.0000,6016142.0000,5984370.0000,6006342.0000,6291844.0000,6301972.0000,6265875.0000,6301852.0000,6269010.0000,6264642.0000,6293778.0000,6261276.0000,6271896.0000,6251738.0000,6260033.0000,6256406.0000,6280493.0000,6264752.0000,6257818.0000,6249484.0000,6259101.0000,6267216.0000,6262969.0000,6255665.0000,6301763.0000,6302915.0000,6264682.0000,6279260.0000,6324706.0000,6266557.0000,6288537.0000,6259903.0000,6203667.0000,6024948.0000,6043222.0000,5977428.0000,6149384.0000,6260644.0000,6251327.0000,6248301.0000,6152569.0000,6009148.0000,6073730.0000,6117564.0000,6358159.0000,6322733.0000,6254242.0000,6256557.0000,6270324.0000,6258781.0000,6267958.0000,6257429.0000,6288808.0000,6027783.0000,6014409.0000,6026521.0000,6015009.0000,6010580.0000,6263440.0000,6265443.0000,6297414.0000,6263100.0000,6260264.0000,6261215.0000,6151899.0000,6141448.0000,6259361.0000,6215990.0000,6018405.0000,6150125.0000,6238342.0000,6246979.0000,6236098.0000,6272798.0000,6262609.0000,6260654.0000,6305779.0000,6262198.0000,6254533.0000" +"normalizing a fully mergeable, complex tiling of boxes - 2d","small, native",100,11,2414500,2282.2827,2276.5064,2302.7200,48.1842,7.7311,108.4810,"2272.3636,2275.0000,2442.6364,2293.1818,2285.0909,2273.2727,2282.2727,2286.9091,2271.4545,2276.8182,2286.9091,2289.6364,2279.5455,2281.3636,2274.1818,2287.8182,2277.7273,2272.3636,2293.2727,2283.1818,2275.0909,2287.8182,2275.9091,2283.2727,2280.5455,2266.8182,2264.0909,2281.4545,2286.0000,2271.3636,2279.6364,2279.5455,2283.1818,2280.5455,2277.7273,2273.1818,2284.1818,2278.6364,2266.0000,2281.3636,2281.4545,2279.6364,2278.6364,2276.0000,2262.2727,2269.6364,2278.6364,2283.2727,2273.1818,2280.4545,2280.5455,2270.4545,2281.4545,2276.8182,2265.9091,2278.6364,2275.9091,2271.3636,2275.0000,2268.6364,2265.0000,2726.8182,2289.6364,2276.8182,2266.0000,2273.1818,2281.4545,2267.7273,2278.7273,2276.8182,2271.4545,2284.1818,2276.8182,2274.1818,2276.8182,2281.4545,2270.4545,2280.5455,2271.4545,2277.7273,2270.5455,2271.3636,2256.8182,2260.4545,2273.2727,2272.2727,2271.4545,2274.0909,2275.0909,2264.0909,2271.4545,2281.3636,2276.9091,2279.6364,2260.4545,2273.1818,2268.7273,2278.6364,2266.0000,2265.9091" +"normalizing a fully mergeable, complex tiling of boxes - 2d","small, embedded in 3d",100,9,2388600,2652.4767,2644.0722,2673.5144,62.8089,32.0142,124.0691,"2660.3333,2640.3333,2903.0000,2664.8889,2665.8889,2677.1111,2651.4444,2658.2222,2657.0000,2666.0000,2659.2222,2642.6667,2645.8889,2650.3333,2652.5556,2647.0000,3153.5556,2639.2222,2645.8889,2644.7778,2640.3333,2648.2222,2663.6667,2640.3333,2629.2222,2650.4444,2642.5556,2650.3333,2645.8889,2633.6667,2650.3333,2641.4444,2651.4444,2647.1111,2650.3333,2635.8889,2642.5556,2654.8889,2644.7778,2641.4444,2648.1111,2637.0000,2635.8889,2639.2222,2644.7778,2639.2222,2640.3333,2640.3333,2641.4444,2816.2222,2634.7778,2629.2222,2613.5556,2633.6667,2627.0000,2634.7778,2631.4444,2635.8889,2626.8889,2633.6667,2639.3333,2622.4444,2627.0000,2612.5556,2632.5556,2624.7778,2632.5556,2633.6667,2624.7778,2634.7778,2631.4444,2633.6667,2635.8889,2627.0000,2631.4444,2635.8889,2627.0000,2638.1111,2642.5556,2624.7778,2638.1111,2633.6667,2628.1111,2646.0000,2639.2222,2620.3333,2638.1111,2635.8889,2631.4444,2643.6667,2820.6667,2639.2222,2644.8889,2647.0000,2660.3333,2662.6667,2637.0000,2642.5556,2653.6667,2659.2222" +"normalizing a fully mergeable, complex tiling of boxes - 2d","large, native",100,1,151765700,1507627.1300,1501617.2700,1513045.8100,28957.7073,26567.2619,30700.3510,"1532222.0000,1528675.0000,1531521.0000,1528155.0000,1532353.0000,1528345.0000,1529016.0000,1533104.0000,1528074.0000,1531691.0000,1529256.0000,1528525.0000,1532943.0000,1527864.0000,1534757.0000,1528095.0000,1527975.0000,1530429.0000,1528475.0000,1529126.0000,1507275.0000,1469213.0000,1474924.0000,1471457.0000,1469243.0000,1473160.0000,1467921.0000,1467609.0000,1507085.0000,1522454.0000,1532574.0000,1528094.0000,1527754.0000,1533605.0000,1529266.0000,1528054.0000,1533164.0000,1529347.0000,1527944.0000,1519819.0000,1519849.0000,1521722.0000,1521523.0000,1529196.0000,1532172.0000,1527884.0000,1532242.0000,1526782.0000,1528155.0000,1533053.0000,1527954.0000,1533264.0000,1527383.0000,1529738.0000,1534516.0000,1528755.0000,1528635.0000,1484482.0000,1466909.0000,1471367.0000,1467409.0000,1467089.0000,1495312.0000,1529046.0000,1527604.0000,1533595.0000,1527183.0000,1531080.0000,1528896.0000,1529206.0000,1532253.0000,1527283.0000,1531341.0000,1495423.0000,1467349.0000,1471136.0000,1469704.0000,1469223.0000,1471637.0000,1465937.0000,1465866.0000,1465946.0000,1460607.0000,1465495.0000,1458242.0000,1458773.0000,1464995.0000,1462871.0000,1462841.0000,1464915.0000,1463001.0000,1461748.0000,1468211.0000,1462971.0000,1475645.0000,1521973.0000,1528776.0000,1529858.0000,1526822.0000,1529307.0000" +"normalizing a fully mergeable, complex tiling of boxes - 2d","large, embedded in 3d",100,1,172734200,1712567.6400,1705390.7300,1718177.6100,32238.6265,26677.7791,37108.6450,"1726430.0000,1728724.0000,1753561.0000,1731970.0000,1727291.0000,1727702.0000,1728505.0000,1728855.0000,1731410.0000,1726020.0000,1728785.0000,1700982.0000,1643343.0000,1646419.0000,1646178.0000,1650526.0000,1649674.0000,1644475.0000,1653472.0000,1636660.0000,1638133.0000,1634486.0000,1638974.0000,1640888.0000,1644785.0000,1649455.0000,1644655.0000,1651368.0000,1644195.0000,1644966.0000,1737391.0000,1724988.0000,1729726.0000,1725298.0000,1726731.0000,1723064.0000,1726982.0000,1732412.0000,1726751.0000,1728935.0000,1723655.0000,1729256.0000,1725448.0000,1725419.0000,1730668.0000,1727332.0000,1728885.0000,1726641.0000,1728514.0000,1725940.0000,1724707.0000,1729747.0000,1724897.0000,1727693.0000,1726180.0000,1727552.0000,1722893.0000,1727122.0000,1727873.0000,1724857.0000,1728124.0000,1728073.0000,1734666.0000,1725198.0000,1729335.0000,1725249.0000,1723675.0000,1730959.0000,1722443.0000,1730358.0000,1727803.0000,1730108.0000,1724937.0000,1726260.0000,1725088.0000,1727302.0000,1726841.0000,1724968.0000,1733955.0000,1728344.0000,1723666.0000,1733343.0000,1728324.0000,1728204.0000,1725098.0000,1727111.0000,1724887.0000,1725539.0000,1725809.0000,1729276.0000,1727312.0000,1724436.0000,1728524.0000,1724217.0000,1724457.0000,1732722.0000,1725098.0000,1730097.0000,1724687.0000,1729827.0000" +benchmark independent task pattern with N tasks - 100,task generation,100,1,947134200,12193683.3100,11853505.4700,12441442.2700,1470621.6512,1083416.8554,1974319.6466,"12082318.0000,11424662.0000,13057297.0000,12141471.0000,12108108.0000,11881668.0000,12565605.0000,12742049.0000,6192687.0000,10502895.0000,13351876.0000,12194101.0000,11934038.0000,12193901.0000,12973198.0000,14048376.0000,12962727.0000,11487131.0000,11614392.0000,11826514.0000,6254934.0000,7696156.0000,12920868.0000,13064912.0000,12630167.0000,12584481.0000,14148595.0000,14304260.0000,14206295.0000,12833774.0000,12164915.0000,12223497.0000,12194701.0000,12596815.0000,12278862.0000,14655977.0000,7762031.0000,6280742.0000,14905029.0000,13639220.0000,12803887.0000,13122891.0000,12833522.0000,12657520.0000,12920367.0000,12339668.0000,12455796.0000,12015231.0000,12437402.0000,12953620.0000,12891463.0000,12775412.0000,12914477.0000,12742932.0000,12272599.0000,12743713.0000,12906090.0000,13608872.0000,11454068.0000,11265912.0000,12099622.0000,12194401.0000,12045250.0000,12264073.0000,12940186.0000,13187103.0000,13357045.0000,11942294.0000,12075376.0000,12019761.0000,12049797.0000,11991257.0000,11933728.0000,12251950.0000,11962472.0000,12008929.0000,12083041.0000,12049607.0000,12136332.0000,12049276.0000,12077901.0000,12021074.0000,11962341.0000,12107247.0000,12020461.0000,11991327.0000,12136661.0000,12049466.0000,12891693.0000,11871900.0000,11549049.0000,10870662.0000,10929334.0000,12175335.0000,11739399.0000,11483404.0000,13135435.0000,14532133.0000,14287719.0000,12147603.0000" +benchmark independent task pattern with N tasks - 1000,task generation,100,1,8633993500,96791314.0600,93699135.8800,99976194.2700,16023733.1992,14422768.6961,17882004.1577,"74190980.0000,110506032.0000,93062691.0000,102178258.0000,97773389.0000,92072212.0000,90557922.0000,114886696.0000,85754788.0000,114824309.0000,124247798.0000,82227843.0000,89024817.0000,86703437.0000,118387942.0000,92885043.0000,113197303.0000,84330039.0000,83342756.0000,117335437.0000,96925732.0000,99470135.0000,89563728.0000,78675510.0000,98699365.0000,103877679.0000,81245642.0000,129804451.0000,121474783.0000,79361602.0000,95047883.0000,116624680.0000,109499014.0000,86803006.0000,90381117.0000,67827339.0000,118824580.0000,104450605.0000,115508174.0000,91772435.0000,116896956.0000,88599660.0000,126677775.0000,109665599.0000,69165866.0000,126100910.0000,83511958.0000,89769027.0000,80542900.0000,91030058.0000,97532452.0000,75382739.0000,86047194.0000,85270831.0000,84710268.0000,98093766.0000,74823530.0000,83223180.0000,83674456.0000,105767300.0000,68672460.0000,82981482.0000,81280919.0000,113189829.0000,123321944.0000,97626671.0000,87471252.0000,95218536.0000,81345921.0000,88671587.0000,84595211.0000,70619531.0000,77119510.0000,65959148.0000,68059599.0000,74971570.0000,126971632.0000,107198834.0000,110311604.0000,91573057.0000,108589108.0000,115314606.0000,119428974.0000,90412527.0000,109004496.0000,128634092.0000,100460411.0000,86499641.0000,97640467.0000,97755515.0000,100200299.0000,121130781.0000,85467414.0000,100177766.0000,98892430.0000,101977878.0000,103016117.0000,100685959.0000,99579712.0000,117289339.0000" +benchmark independent task pattern with N tasks - 5000,task generation,100,1,53919014600,512098930.3300,501628461.3900,522535167.5700,53032970.8969,47080667.2921,61219675.3989,"549278715.0000,578989878.0000,530702316.0000,554813356.0000,523145973.0000,507357808.0000,557186664.0000,512898951.0000,477550832.0000,489821159.0000,593508668.0000,551868864.0000,543649124.0000,508930979.0000,529085661.0000,624851004.0000,596629442.0000,544876450.0000,593756978.0000,585512922.0000,605154410.0000,600323344.0000,606031652.0000,565697553.0000,622150505.0000,566912065.0000,579365269.0000,623711212.0000,535371583.0000,520187062.0000,480276575.0000,484959992.0000,514299012.0000,498377143.0000,522210076.0000,537898030.0000,526325376.0000,473857338.0000,473777626.0000,455375407.0000,489126609.0000,491114908.0000,528046097.0000,462634816.0000,495627471.0000,486173591.0000,512047732.0000,473659722.0000,480997109.0000,411033310.0000,490192759.0000,490474573.0000,484442038.0000,444110614.0000,494932652.0000,465150092.0000,505768741.0000,446372290.0000,469520076.0000,470539177.0000,451925726.0000,482955020.0000,495996097.0000,463465298.0000,454504263.0000,513464357.0000,461695062.0000,544237232.0000,441030394.0000,358568136.0000,424481518.0000,470588259.0000,412710156.0000,410985448.0000,472473441.0000,455850966.0000,432513611.0000,438808012.0000,472071089.0000,462300068.0000,508643838.0000,553116942.0000,548122245.0000,551026589.0000,577916687.0000,518102755.0000,497830872.0000,515990923.0000,538492402.0000,521696607.0000,565981834.0000,549449911.0000,505241347.0000,524489329.0000,553285299.0000,478027266.0000,541416032.0000,524125079.0000,601013853.0000,576657719.0000" diff --git a/ci/perf/gpuc2_bench.md b/ci/perf/gpuc2_bench.md index 69cfd4aa7..93e0ba01e 100644 --- a/ci/perf/gpuc2_bench.md +++ b/ci/perf/gpuc2_bench.md @@ -2,99 +2,154 @@ | Metadata | | | :------- | :------------------- | -| Created | 2023-09-01T11:16:24Z | +| Created | 2023-09-13T22:20:03Z | | Test case | Benchmark name | Min | Mean | Std dev | | :------------------------------------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------- | -------------: | -------------: | ------------: | -| benchmark intrusive graph dependency handling with N nodes - 1 | creating nodes | 4.47 | 4.56 | 0.81 | -| benchmark intrusive graph dependency handling with N nodes - 1 | creating and adding dependencies | 22.64 | 22.90 | 0.09 | -| benchmark intrusive graph dependency handling with N nodes - 1 | adding and removing dependencies | 15.45 | 15.76 | 0.21 | -| benchmark intrusive graph dependency handling with N nodes - 1 | checking for dependencies | 1.69 | 1.70 | 0.00 | -| benchmark intrusive graph dependency handling with N nodes - 10 | creating nodes | 38.87 | 39.42 | 0.39 | -| benchmark intrusive graph dependency handling with N nodes - 10 | creating and adding dependencies | 243.51 | 244.50 | 0.39 | -| benchmark intrusive graph dependency handling with N nodes - 10 | adding and removing dependencies | 200.08 | 201.92 | 4.74 | -| benchmark intrusive graph dependency handling with N nodes - 10 | checking for dependencies | 25.35 | 26.64 | 0.85 | -| benchmark intrusive graph dependency handling with N nodes - 100 | creating nodes | 394.66 | 399.34 | 3.04 | -| benchmark intrusive graph dependency handling with N nodes - 100 | creating and adding dependencies | 4'483.17 | 4'518.20 | 64.28 | -| benchmark intrusive graph dependency handling with N nodes - 100 | adding and removing dependencies | 4'836.80 | 4'873.35 | 16.87 | -| benchmark intrusive graph dependency handling with N nodes - 100 | checking for dependencies | 1'766.07 | 1'774.30 | 3.66 | -| benchmark task handling > without access thread | generating and deleting tasks | 2'950'958.00 | 3'515'069.04 | 256'538.25 | -| benchmark task handling > with access thread | generating and deleting tasks with access thread | 7'323'502.00 | 7'985'335.82 | 182'135.77 | -| generating large task graphs | soup topology | 1'308'136.00 | 1'319'213.27 | 17'701.28 | -| generating large task graphs | chain topology | 42'027.00 | 42'958.74 | 5'733.82 | -| generating large task graphs | expanding tree topology | 53'950.00 | 57'518.55 | 7'157.07 | -| generating large task graphs | contracting tree topology | 99'757.00 | 101'296.31 | 6'214.31 | -| generating large task graphs | wave\_sim topology | 342'517.00 | 400'732.25 | 25'301.74 | -| generating large task graphs | jacobi topology | 135'455.00 | 136'948.99 | 6'074.17 | -| generating large command graphs for N nodes - 1 | soup topology | 1'652'990.00 | 1'958'748.15 | 176'232.32 | -| generating large command graphs for N nodes - 1 | chain topology | 137'168.00 | 150'596.67 | 22'118.89 | -| generating large command graphs for N nodes - 1 | expanding tree topology | 186'361.00 | 190'034.21 | 8'968.37 | -| generating large command graphs for N nodes - 1 | contracting tree topology | 230'344.00 | 233'637.44 | 7'664.21 | -| generating large command graphs for N nodes - 1 | wave\_sim topology | 1'152'171.00 | 1'165'126.07 | 18'601.10 | -| generating large command graphs for N nodes - 1 | jacobi topology | 400'857.00 | 406'755.55 | 9'192.95 | -| generating large command graphs for N nodes - 4 | soup topology | 2'017'290.00 | 2'419'506.57 | 182'995.03 | -| generating large command graphs for N nodes - 4 | chain topology | 370'681.00 | 378'250.50 | 20'656.41 | -| generating large command graphs for N nodes - 4 | expanding tree topology | 459'929.00 | 465'948.52 | 13'069.16 | -| generating large command graphs for N nodes - 4 | contracting tree topology | 503'111.00 | 508'960.84 | 11'660.60 | -| generating large command graphs for N nodes - 4 | wave\_sim topology | 2'020'666.00 | 2'328'314.10 | 143'627.36 | -| generating large command graphs for N nodes - 4 | jacobi topology | 776'209.00 | 832'185.33 | 60'791.52 | -| generating large command graphs for N nodes - 16 | soup topology | 2'574'456.00 | 2'978'129.12 | 248'980.49 | -| generating large command graphs for N nodes - 16 | chain topology | 1'091'606.00 | 1'220'057.98 | 80'687.75 | -| generating large command graphs for N nodes - 16 | expanding tree topology | 1'151'801.00 | 1'166'405.72 | 9'095.46 | -| generating large command graphs for N nodes - 16 | contracting tree topology | 1'042'203.00 | 1'158'620.22 | 99'810.73 | -| generating large command graphs for N nodes - 16 | wave\_sim topology | 3'848'349.00 | 4'438'359.03 | 321'458.21 | -| generating large command graphs for N nodes - 16 | jacobi topology | 2'196'429.00 | 2'514'141.47 | 172'131.45 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | soup topology | 1'660'504.00 | 2'027'151.53 | 151'484.49 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | chain topology | 117'531.00 | 118'540.33 | 1'295.01 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | expanding tree topology | 162'356.00 | 178'499.42 | 14'018.41 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | contracting tree topology | 234'873.00 | 237'058.49 | 1'943.95 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | wave\_sim topology | 981'488.00 | 1'153'820.84 | 62'733.53 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | jacobi topology | 350'873.00 | 411'122.76 | 15'479.94 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | soup topology | 1'459'674.00 | 1'905'307.86 | 89'596.97 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | chain topology | 290'699.00 | 359'442.08 | 35'781.45 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | expanding tree topology | 327'890.00 | 404'205.15 | 47'601.63 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | contracting tree topology | 283'245.00 | 345'736.94 | 45'127.53 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | wave\_sim topology | 1'128'145.00 | 1'249'644.73 | 75'007.38 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | jacobi topology | 483'925.00 | 554'800.07 | 46'903.92 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | soup topology | 2'662'061.00 | 2'994'286.44 | 180'038.86 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | chain topology | 409'774.00 | 419'183.21 | 14'442.18 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | expanding tree topology | 464'919.00 | 475'606.00 | 13'333.58 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | contracting tree topology | 497'991.00 | 524'375.57 | 19'386.36 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | wave\_sim topology | 3'017'835.00 | 3'187'415.31 | 70'663.71 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | jacobi topology | 914'390.00 | 918'443.70 | 5'688.71 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | soup topology | 2'531'675.00 | 2'881'294.94 | 214'528.59 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | chain topology | 518'961.00 | 544'596.83 | 13'549.62 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | expanding tree topology | 515'415.00 | 620'310.38 | 38'647.89 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | contracting tree topology | 562'033.00 | 652'830.19 | 31'311.72 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | wave\_sim topology | 3'020'652.00 | 3'257'041.44 | 125'546.19 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | jacobi topology | 862'101.00 | 986'714.78 | 63'472.52 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | soup topology | 2'034'643.00 | 2'408'020.52 | 202'784.53 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | chain topology | 378'526.00 | 382'178.07 | 2'549.93 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | expanding tree topology | 405'877.00 | 455'874.09 | 26'691.70 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | contracting tree topology | 437'417.00 | 495'856.33 | 32'954.81 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | wave\_sim topology | 2'029'714.00 | 2'351'543.24 | 132'285.51 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | jacobi topology | 786'579.00 | 879'728.78 | 63'835.77 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | soup topology | 1'433'714.00 | 1'799'572.23 | 224'525.51 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | chain topology | 547'064.00 | 601'158.81 | 106'606.93 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | expanding tree topology | 638'408.00 | 707'736.01 | 23'495.63 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | contracting tree topology | 685'446.00 | 753'270.58 | 105'691.32 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | wave\_sim topology | 2'027'860.00 | 2'312'390.37 | 114'343.35 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | jacobi topology | 986'667.00 | 1'089'893.93 | 149'434.22 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | soup topology | 3'029'277.00 | 3'306'354.00 | 229'040.32 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | chain topology | 619'682.00 | 670'709.92 | 23'487.32 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | expanding tree topology | 707'158.00 | 753'557.81 | 29'506.39 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | contracting tree topology | 738'928.00 | 812'828.43 | 22'734.87 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | wave\_sim topology | 4'070'029.00 | 4'345'765.24 | 167'609.05 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | jacobi topology | 1'288'429.00 | 1'406'663.13 | 57'110.53 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | soup topology | 2'488'292.00 | 2'858'993.23 | 106'696.44 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | chain topology | 446'113.00 | 593'733.49 | 118'918.25 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | expanding tree topology | 635'422.00 | 724'724.01 | 115'394.95 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | contracting tree topology | 670'448.00 | 793'321.23 | 131'366.87 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | wave\_sim topology | 2'943'255.00 | 3'216'111.16 | 62'676.67 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | jacobi topology | 993'350.00 | 1'105'717.07 | 153'380.19 | -| benchmark independent task pattern with N tasks - 100 | task generation | 4'077'182.00 | 8'159'441.33 | 2'611'192.01 | -| benchmark independent task pattern with N tasks - 1000 | task generation | 63'383'443.00 | 98'678'958.86 | 16'792'148.27 | -| benchmark independent task pattern with N tasks - 5000 | task generation | 332'806'307.00 | 495'209'589.07 | 53'087'816.48 | +| benchmark intrusive graph dependency handling with N nodes - 1 | creating nodes | 4.47 | 4.48 | 0.01 | +| benchmark intrusive graph dependency handling with N nodes - 1 | creating and adding dependencies | 21.30 | 22.37 | 3.91 | +| benchmark intrusive graph dependency handling with N nodes - 1 | adding and removing dependencies | 15.45 | 15.50 | 0.19 | +| benchmark intrusive graph dependency handling with N nodes - 1 | checking for dependencies | 1.37 | 1.40 | 0.25 | +| benchmark intrusive graph dependency handling with N nodes - 10 | creating nodes | 38.87 | 39.89 | 6.77 | +| benchmark intrusive graph dependency handling with N nodes - 10 | creating and adding dependencies | 236.53 | 238.29 | 3.49 | +| benchmark intrusive graph dependency handling with N nodes - 10 | adding and removing dependencies | 219.04 | 222.47 | 3.71 | +| benchmark intrusive graph dependency handling with N nodes - 10 | checking for dependencies | 23.52 | 23.73 | 0.39 | +| benchmark intrusive graph dependency handling with N nodes - 100 | creating nodes | 390.05 | 405.53 | 86.19 | +| benchmark intrusive graph dependency handling with N nodes - 100 | creating and adding dependencies | 3'927.17 | 4'128.78 | 80.35 | +| benchmark intrusive graph dependency handling with N nodes - 100 | adding and removing dependencies | 4'668.50 | 4'722.76 | 58.53 | +| benchmark intrusive graph dependency handling with N nodes - 100 | checking for dependencies | 1'648.33 | 1'705.35 | 26.76 | +| benchmark task handling > without access thread | generating and deleting tasks | 2'995'025.00 | 3'424'395.69 | 281'411.97 | +| benchmark task handling > with access thread | generating and deleting tasks with access thread | 7'245'151.00 | 7'845'350.37 | 271'314.88 | +| generating large task graphs | soup topology | 842'345.00 | 846'146.14 | 2'384.21 | +| generating large task graphs | chain topology | 35'165.00 | 35'539.63 | 544.48 | +| generating large task graphs | expanding tree topology | 55'774.00 | 56'693.66 | 1'574.18 | +| generating large task graphs | contracting tree topology | 78'316.00 | 79'067.31 | 951.42 | +| generating large task graphs | wave\_sim topology | 284'167.00 | 286'648.25 | 6'443.57 | +| generating large task graphs | jacobi topology | 102'122.00 | 102'852.12 | 950.72 | +| generating large command graphs for N nodes - 1 | soup topology | 1'568'912.00 | 1'576'571.76 | 4'371.09 | +| generating large command graphs for N nodes - 1 | chain topology | 106'460.00 | 110'380.01 | 7'549.18 | +| generating large command graphs for N nodes - 1 | expanding tree topology | 160'252.00 | 162'664.59 | 3'103.87 | +| generating large command graphs for N nodes - 1 | contracting tree topology | 182'343.00 | 200'797.15 | 13'819.32 | +| generating large command graphs for N nodes - 1 | wave\_sim topology | 924'010.00 | 1'046'738.34 | 70'294.77 | +| generating large command graphs for N nodes - 1 | jacobi topology | 348'028.00 | 351'385.19 | 6'675.62 | +| generating large command graphs for N nodes - 4 | soup topology | 1'673'891.00 | 1'922'108.22 | 130'770.15 | +| generating large command graphs for N nodes - 4 | chain topology | 328'541.00 | 358'519.14 | 24'324.94 | +| generating large command graphs for N nodes - 4 | expanding tree topology | 453'568.00 | 459'281.24 | 13'581.67 | +| generating large command graphs for N nodes - 4 | contracting tree topology | 486'461.00 | 490'426.63 | 3'034.51 | +| generating large command graphs for N nodes - 4 | wave\_sim topology | 1'936'909.00 | 2'210'197.98 | 127'731.56 | +| generating large command graphs for N nodes - 4 | jacobi topology | 821'455.00 | 825'648.05 | 3'132.03 | +| generating large command graphs for N nodes - 16 | soup topology | 2'176'814.00 | 2'520'158.20 | 199'963.70 | +| generating large command graphs for N nodes - 16 | chain topology | 1'076'578.00 | 1'230'936.94 | 48'272.08 | +| generating large command graphs for N nodes - 16 | expanding tree topology | 954'317.00 | 1'090'513.93 | 70'598.19 | +| generating large command graphs for N nodes - 16 | contracting tree topology | 1'024'981.00 | 1'205'571.59 | 45'443.57 | +| generating large command graphs for N nodes - 16 | wave\_sim topology | 3'523'547.00 | 4'149'140.30 | 306'017.93 | +| generating large command graphs for N nodes - 16 | jacobi topology | 2'015'138.00 | 2'322'835.48 | 161'050.67 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | soup topology | 1'337'884.00 | 1'600'797.17 | 81'568.57 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | chain topology | 108'073.00 | 119'607.87 | 10'040.26 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | expanding tree topology | 184'107.00 | 187'228.94 | 8'079.39 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | contracting tree topology | 213'382.00 | 215'863.90 | 1'647.01 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | wave\_sim topology | 1'080'446.00 | 1'091'880.72 | 5'120.88 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | jacobi topology | 354'219.00 | 356'168.73 | 1'737.65 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | soup topology | 1'023'959.00 | 1'244'412.08 | 148'228.25 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | chain topology | 236'176.00 | 297'619.37 | 43'480.44 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | expanding tree topology | 281'281.00 | 383'519.43 | 38'540.43 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | contracting tree topology | 344'691.00 | 426'523.86 | 38'940.30 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | wave\_sim topology | 947'444.00 | 1'123'712.84 | 156'168.34 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | jacobi topology | 473'937.00 | 525'035.97 | 75'981.37 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | soup topology | 2'622'759.00 | 2'643'829.64 | 21'804.80 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | chain topology | 420'074.00 | 423'334.28 | 10'691.60 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | expanding tree topology | 488'183.00 | 493'179.09 | 11'358.64 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | contracting tree topology | 517'279.00 | 521'654.42 | 10'967.43 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | wave\_sim topology | 3'128'929.00 | 3'149'747.39 | 22'516.04 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | jacobi topology | 856'471.00 | 864'048.38 | 15'214.53 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | soup topology | 2'079'990.00 | 2'270'284.24 | 103'417.84 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | chain topology | 417'790.00 | 493'717.02 | 22'202.11 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | expanding tree topology | 518'340.00 | 558'604.23 | 26'094.34 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | contracting tree topology | 481'240.00 | 547'150.17 | 41'470.70 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | wave\_sim topology | 2'783'384.00 | 3'017'446.20 | 179'435.41 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | jacobi topology | 798'912.00 | 893'464.74 | 38'529.43 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | soup topology | 1'706'382.00 | 2'036'080.12 | 80'917.53 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | chain topology | 333'140.00 | 377'710.08 | 20'580.44 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | expanding tree topology | 402'421.00 | 435'948.47 | 27'895.65 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | contracting tree topology | 493'684.00 | 498'230.49 | 6'140.59 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | wave\_sim topology | 1'935'096.00 | 2'233'613.70 | 124'485.60 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | jacobi topology | 709'613.00 | 789'390.85 | 62'343.45 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | soup topology | 1'269'184.00 | 1'410'411.03 | 147'296.13 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | chain topology | 480'418.00 | 584'265.31 | 100'372.51 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | expanding tree topology | 628'109.00 | 731'552.29 | 99'750.59 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | contracting tree topology | 640'822.00 | 750'304.55 | 116'610.09 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | wave\_sim topology | 1'921'931.00 | 2'200'987.60 | 308'575.49 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | jacobi topology | 845'140.00 | 986'959.93 | 127'677.38 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | soup topology | 2'707'059.00 | 3'006'296.04 | 124'500.35 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | chain topology | 678'003.00 | 680'884.60 | 2'425.30 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | expanding tree topology | 705'586.00 | 755'875.16 | 34'583.80 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | contracting tree topology | 730'022.00 | 783'766.63 | 30'291.52 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | wave\_sim topology | 3'976'145.00 | 4'266'970.31 | 141'450.87 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | jacobi topology | 1'214'650.00 | 1'305'467.72 | 56'317.33 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | soup topology | 2'089'278.00 | 2'348'000.44 | 101'285.31 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | chain topology | 491'149.00 | 580'949.21 | 89'268.04 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | expanding tree topology | 553'757.00 | 699'407.36 | 138'578.73 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | contracting tree topology | 697'890.00 | 764'423.47 | 61'133.08 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | wave\_sim topology | 2'762'885.00 | 3'090'336.68 | 249'339.41 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | jacobi topology | 935'321.00 | 1'024'683.71 | 95'665.10 | +| normalizing randomized box sets - 2d | small, native | 576.86 | 582.50 | 12.98 | +| normalizing randomized box sets - 2d | small, embedded in 3d | 684.97 | 693.52 | 3.99 | +| normalizing randomized box sets - 2d | medium, native | 5'575.00 | 5'644.78 | 176.74 | +| normalizing randomized box sets - 2d | medium, embedded in 3d | 6'807.50 | 6'879.02 | 190.13 | +| normalizing randomized box sets - 2d | large, native | 194'236.00 | 198'399.10 | 4'248.34 | +| normalizing randomized box sets - 2d | large, embedded in 3d | 212'641.00 | 213'858.22 | 1'146.76 | +| normalizing randomized box sets - 3d | small - native | 2'494.50 | 2'522.03 | 57.51 | +| normalizing randomized box sets - 3d | medium - native | 9'487.33 | 9'601.41 | 130.64 | +| normalizing randomized box sets - 3d | large - native | 2'045'595.00 | 2'181'704.29 | 42'323.31 | +| normalizing a fully mergeable tiling of boxes - 1 | small, native | 28.16 | 28.37 | 0.20 | +| normalizing a fully mergeable tiling of boxes - 1 | small, embedded in 3d | 49.84 | 50.58 | 0.65 | +| normalizing a fully mergeable tiling of boxes - 1 | medium, native | 298.76 | 303.73 | 4.37 | +| normalizing a fully mergeable tiling of boxes - 1 | medium, embedded in 3d | 426.58 | 427.92 | 1.55 | +| normalizing a fully mergeable tiling of boxes - 1 | large, native | 7'451.25 | 7'542.91 | 39.78 | +| normalizing a fully mergeable tiling of boxes - 1 | large, embedded in 3d | 13'014.00 | 13'103.66 | 160.86 | +| normalizing a fully mergeable tiling of boxes - 2 | small, native | 95.48 | 95.81 | 0.36 | +| normalizing a fully mergeable tiling of boxes - 2 | small, embedded in 3d | 122.76 | 124.87 | 1.67 | +| normalizing a fully mergeable tiling of boxes - 2 | medium, native | 812.18 | 897.33 | 37.78 | +| normalizing a fully mergeable tiling of boxes - 2 | medium, embedded in 3d | 1'015.58 | 1'027.95 | 18.18 | +| normalizing a fully mergeable tiling of boxes - 2 | large, native | 36'808.00 | 37'302.12 | 604.58 | +| normalizing a fully mergeable tiling of boxes - 2 | large, embedded in 3d | 37'429.00 | 37'847.07 | 683.73 | +| normalizing a fully mergeable tiling of boxes - 3 | small, native | 216.15 | 217.20 | 5.16 | +| normalizing a fully mergeable tiling of boxes - 3 | medium, native | 1'464.41 | 1'477.86 | 29.79 | +| normalizing a fully mergeable tiling of boxes - 3 | large, native | 44'191.00 | 45'351.96 | 1'842.96 | +| performing set operations between randomized regions - 2d | union, small, native | 896.07 | 908.91 | 15.53 | +| performing set operations between randomized regions - 2d | union, small, embedded in 3d | 1'035.62 | 1'043.33 | 7.72 | +| performing set operations between randomized regions - 2d | intersection, small, native | 237.84 | 240.37 | 2.62 | +| performing set operations between randomized regions - 2d | intersection, small, embedded in 3d | 247.26 | 248.54 | 4.19 | +| performing set operations between randomized regions - 2d | difference, small, native | 946.71 | 957.04 | 22.25 | +| performing set operations between randomized regions - 2d | difference, small, embedded in 3d | 1'236.10 | 1'260.07 | 39.85 | +| performing set operations between randomized regions - 2d | union, medium, native | 13'309.50 | 13'520.11 | 289.40 | +| performing set operations between randomized regions - 2d | union, medium, embedded in 3d | 13'274.00 | 13'414.25 | 221.79 | +| performing set operations between randomized regions - 2d | intersection, medium, native | 2'248.64 | 2'269.85 | 38.06 | +| performing set operations between randomized regions - 2d | intersection, medium, embedded in 3d | 2'262.27 | 2'275.09 | 6.07 | +| performing set operations between randomized regions - 2d | difference, medium, native | 7'617.00 | 7'684.27 | 184.67 | +| performing set operations between randomized regions - 2d | difference, medium, embedded in 3d | 8'655.67 | 8'776.57 | 99.63 | +| performing set operations between randomized regions - 2d | union, large, native | 152'306.00 | 158'790.42 | 3'202.13 | +| performing set operations between randomized regions - 2d | union, large, embedded in 3d | 170'742.00 | 171'597.90 | 1'027.79 | +| performing set operations between randomized regions - 2d | intersection, large, native | 18'619.00 | 18'786.43 | 276.17 | +| performing set operations between randomized regions - 2d | intersection, large, embedded in 3d | 21'044.00 | 21'257.08 | 201.49 | +| performing set operations between randomized regions - 2d | difference, large, native | 619'232.00 | 637'186.54 | 6'731.01 | +| performing set operations between randomized regions - 2d | difference, large, embedded in 3d | 655'330.00 | 671'807.46 | 9'628.90 | +| performing set operations between randomized regions - 3d | union, small, native | 3'854.14 | 3'955.04 | 691.17 | +| performing set operations between randomized regions - 3d | intersection, small, native | 145.36 | 148.53 | 2.79 | +| performing set operations between randomized regions - 3d | difference, small, native | 1'402.05 | 1'419.09 | 25.81 | +| performing set operations between randomized regions - 3d | union, medium, native | 22'256.00 | 22'534.12 | 578.26 | +| performing set operations between randomized regions - 3d | intersection, medium, native | 2'539.60 | 2'556.76 | 7.59 | +| performing set operations between randomized regions - 3d | difference, medium, native | 11'284.00 | 11'406.79 | 157.01 | +| performing set operations between randomized regions - 3d | union, large, native | 2'009'397.00 | 2'124'888.38 | 54'775.44 | +| performing set operations between randomized regions - 3d | intersection, large, native | 14'927.50 | 15'050.97 | 162.09 | +| performing set operations between randomized regions - 3d | difference, large, native | 5'950'517.00 | 6'214'396.70 | 121'809.78 | +| normalizing a fully mergeable, complex tiling of boxes - 2d | small, native | 2'256.82 | 2'282.28 | 48.18 | +| normalizing a fully mergeable, complex tiling of boxes - 2d | small, embedded in 3d | 2'612.56 | 2'652.48 | 62.81 | +| normalizing a fully mergeable, complex tiling of boxes - 2d | large, native | 1'458'242.00 | 1'507'627.13 | 28'957.71 | +| normalizing a fully mergeable, complex tiling of boxes - 2d | large, embedded in 3d | 1'634'486.00 | 1'712'567.64 | 32'238.63 | +| benchmark independent task pattern with N tasks - 100 | task generation | 6'192'687.00 | 12'193'683.31 | 1'470'621.65 | +| benchmark independent task pattern with N tasks - 1000 | task generation | 65'959'148.00 | 96'791'314.06 | 16'023'733.20 | +| benchmark independent task pattern with N tasks - 5000 | task generation | 358'568'136.00 | 512'098'930.33 | 53'032'970.90 | All numbers are in nanoseconds. From 85a98a80b2c71c82a95c28228fd988ca2ffd9e12 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Thu, 14 Sep 2023 15:03:53 +0200 Subject: [PATCH 17/17] Rename first+last iterator pairs to begin+end --- include/grid.h | 14 +++---- src/grid.cc | 112 ++++++++++++++++++++++++------------------------- 2 files changed, 63 insertions(+), 63 deletions(-) diff --git a/include/grid.h b/include/grid.h index d84ac3b52..df3a3a8f8 100644 --- a/include/grid.h +++ b/include/grid.h @@ -39,8 +39,8 @@ region make_region(Params&&... args) { } template -int get_effective_dims(const InputIterator first, const InputIterator last) { - return std::accumulate(first, last, 0, [](const int min_dims, const auto& box) { return std::max(min_dims, box.get_effective_dims()); }); +int get_effective_dims(const InputIterator begin, const InputIterator end) { + return std::accumulate(begin, end, 0, [](const int min_dims, const auto& box) { return std::max(min_dims, box.get_effective_dims()); }); } } // namespace celerity::detail::grid_detail @@ -159,15 +159,15 @@ box bounding_box(const box& box1, const box& box2) { } template -auto bounding_box(InputIterator first, const InputIterator last) { +auto bounding_box(InputIterator begin, const InputIterator end) { using box_type = typename std::iterator_traits::value_type; - if(first == last) { + if(begin == end) { assert(box_type::dimensions > 0); // box<0> can never be empty return box_type(); } - const auto init = *first; - return std::accumulate(++first, last, init, bounding_box); + const auto init = *begin; + return std::accumulate(++begin, end, init, bounding_box); } template @@ -265,7 +265,7 @@ void dissect_box(const box& in_box, const std::vector -BidirectionalIterator merge_connected_boxes_along_dim(const BidirectionalIterator first, const BidirectionalIterator last); +BidirectionalIterator merge_connected_boxes_along_dim(const BidirectionalIterator begin, const BidirectionalIterator end); // forward-declaration for tests (explicitly instantiated) template diff --git a/src/grid.cc b/src/grid.cc index 42cc4155b..cc0098bef 100644 --- a/src/grid.cc +++ b/src/grid.cc @@ -43,33 +43,33 @@ bool box_covers(const box& top, const box& bottom) { // In a range of boxes that are identical in all dimensions except MergeDim, merge all connected boxes ("unconditional directional merge") template -BidirectionalIterator merge_connected_intervals(BidirectionalIterator first, BidirectionalIterator last) { +BidirectionalIterator merge_connected_intervals(BidirectionalIterator begin, BidirectionalIterator end) { using box_type = typename std::iterator_traits::value_type; - if(first == last || std::next(first) == last) return last; // common-case shortcut: no merge is possible + if(begin == end || std::next(begin) == end) return end; // common-case shortcut: no merge is possible // Sort by interval starting point - std::sort(first, last, [](const box_type& lhs, const box_type& rhs) { return lhs.get_min()[MergeDim] < rhs.get_min()[MergeDim]; }); + std::sort(begin, end, [](const box_type& lhs, const box_type& rhs) { return lhs.get_min()[MergeDim] < rhs.get_min()[MergeDim]; }); // The range is both read and written from left-to-right, avoiding repeated left-shifts for compaction - auto last_out = first; + auto out_end = begin; // Merge all connected boxes along MergeDim in O(N) by replacing each connected sequence with its bounding box - while(first != last) { - const auto merged_min = first->get_min(); - auto merged_max = first->get_max(); - for(++first; first != last && first->get_min()[MergeDim] <= merged_max[MergeDim]; ++first) { - merged_max[MergeDim] = std::max(merged_max[MergeDim], first->get_max()[MergeDim]); + while(begin != end) { + const auto merged_min = begin->get_min(); + auto merged_max = begin->get_max(); + for(++begin; begin != end && begin->get_min()[MergeDim] <= merged_max[MergeDim]; ++begin) { + merged_max[MergeDim] = std::max(merged_max[MergeDim], begin->get_max()[MergeDim]); } - *last_out++ = make_box(grid_detail::non_empty, merged_min, merged_max); + *out_end++ = make_box(grid_detail::non_empty, merged_min, merged_max); } - return last_out; + return out_end; } // In an arbitrary range of boxes, merge all boxes that are identical in all dimensions except MergeDim ("conditional directional merge"). template -BidirectionalIterator merge_connected_boxes_along_dim(const BidirectionalIterator first, const BidirectionalIterator last) { +BidirectionalIterator merge_connected_boxes_along_dim(const BidirectionalIterator begin, const BidirectionalIterator end) { using box_type = typename std::iterator_traits::value_type; static_assert(EffectiveDims <= box_type::dimensions); static_assert(MergeDim < EffectiveDims); @@ -87,56 +87,56 @@ BidirectionalIterator merge_connected_boxes_along_dim(const BidirectionalIterato }; if constexpr(EffectiveDims == 1) { - return merge_connected_intervals(first, last); + return merge_connected_intervals(begin, end); } else { - // partition [first, last) into sequences of boxes that are potentially mergeable wrt/ the dimensions orthogonal to MergeDim. + // partition [begin, end) into sequences of boxes that are potentially mergeable wrt/ the dimensions orthogonal to MergeDim. // This reduces complexity from O(n^3) to O(n log n) + O(m^3), where m is the longest mergeable sequence in that regard. - std::sort(first, last, orthogonal_to_merge_dim); + std::sort(begin, end, orthogonal_to_merge_dim); - // we want the result to be contiguous in [first, last_out), so in each iteration, we merge all boxes of a MergeDim-equal partition at their original + // we want the result to be contiguous in [begin, out_end), so in each iteration, we merge all boxes of a MergeDim-equal partition at their original // position in the iterator range; and then shift the merged range back to fill any gap left by merge of a previous partition. - auto last_out = first; + auto out_end = begin; - for(auto first_equal = first; first_equal != last;) { + for(auto equal_begin = begin; equal_begin != end;) { // O(n) std::find_if could be replaced by O(log n) std::partition_point, but we expect the number of "equal" elements to be small - const auto last_equal = std::find_if(std::next(first_equal), last, [&](const box_type& box) { - return orthogonal_to_merge_dim(*first_equal, box); // true if box is in a partition _after_ *first_equal + const auto equal_end = std::find_if(std::next(equal_begin), end, [&](const box_type& box) { + return orthogonal_to_merge_dim(*equal_begin, box); // true if box is in a partition _after_ *equal_begin }); - const auto last_merged = merge_connected_intervals(first_equal, last_equal); + const auto merged_end = merge_connected_intervals(equal_begin, equal_end); // shift the newly merged boxes to the left to close any gap opened by the merge of a previous partition - last_out = std::move(first_equal, last_merged, last_out); - first_equal = last_equal; + out_end = std::move(equal_begin, merged_end, out_end); + equal_begin = equal_end; } - return last_out; + return out_end; } } // explicit instantiations for tests (might otherwise be inlined) -template box_vector<1>::iterator merge_connected_boxes_along_dim<0, 1>(box_vector<1>::iterator first, box_vector<1>::iterator last); -template box_vector<2>::iterator merge_connected_boxes_along_dim<0, 2>(box_vector<2>::iterator first, box_vector<2>::iterator last); -template box_vector<2>::iterator merge_connected_boxes_along_dim<1, 2>(box_vector<2>::iterator first, box_vector<2>::iterator last); -template box_vector<3>::iterator merge_connected_boxes_along_dim<0, 3>(box_vector<3>::iterator first, box_vector<3>::iterator last); -template box_vector<3>::iterator merge_connected_boxes_along_dim<1, 3>(box_vector<3>::iterator first, box_vector<3>::iterator last); -template box_vector<3>::iterator merge_connected_boxes_along_dim<2, 3>(box_vector<3>::iterator first, box_vector<3>::iterator last); +template box_vector<1>::iterator merge_connected_boxes_along_dim<0, 1>(box_vector<1>::iterator begin, box_vector<1>::iterator end); +template box_vector<2>::iterator merge_connected_boxes_along_dim<0, 2>(box_vector<2>::iterator begin, box_vector<2>::iterator end); +template box_vector<2>::iterator merge_connected_boxes_along_dim<1, 2>(box_vector<2>::iterator begin, box_vector<2>::iterator end); +template box_vector<3>::iterator merge_connected_boxes_along_dim<0, 3>(box_vector<3>::iterator begin, box_vector<3>::iterator end); +template box_vector<3>::iterator merge_connected_boxes_along_dim<1, 3>(box_vector<3>::iterator begin, box_vector<3>::iterator end); +template box_vector<3>::iterator merge_connected_boxes_along_dim<2, 3>(box_vector<3>::iterator begin, box_vector<3>::iterator end); // For higher-dimensional regions, the order in which dimensions are merged is relevant for the shape of the resulting box set. We merge along the last // ("fastest") dimension first to make sure the resulting boxes cover the largest possible extent of contiguous memory when are applied to buffers. template -BidirectionalIterator merge_connected_boxes_recurse(const BidirectionalIterator first, BidirectionalIterator last) { +BidirectionalIterator merge_connected_boxes_recurse(const BidirectionalIterator begin, BidirectionalIterator end) { static_assert(MergeDim >= 0 && MergeDim < EffectiveDims); - last = merge_connected_boxes_along_dim(first, last); - if constexpr(MergeDim > 0) { last = merge_connected_boxes_recurse(first, last); } - return last; + end = merge_connected_boxes_along_dim(begin, end); + if constexpr(MergeDim > 0) { end = merge_connected_boxes_recurse(begin, end); } + return end; } // Merge all adjacent boxes that are connected and identical in all except a single dimension. template -BidirectionalIterator merge_connected_boxes(const BidirectionalIterator first, BidirectionalIterator last) { +BidirectionalIterator merge_connected_boxes(const BidirectionalIterator begin, BidirectionalIterator end) { using box_type = typename std::iterator_traits::value_type; static_assert(EffectiveDims <= box_type::dimensions); - if constexpr(EffectiveDims > 0) { last = merge_connected_boxes_recurse(first, last); } - return last; + if constexpr(EffectiveDims > 0) { end = merge_connected_boxes_recurse(begin, end); } + return end; } // Split a box into parts according to dissection lines in `cuts`, where `cuts` is indexed by component dimension. This function is not generic @@ -185,30 +185,30 @@ template void dissect_box(const box<3>& in_box, const std::vector -void dissect_boxes(const InputIterator first, const InputIterator last, const std::vector>& cuts, +void dissect_boxes(const InputIterator begin, const InputIterator end, const std::vector>& cuts, box_vector::value_type::dimensions>& out_dissected) { if(!cuts.empty()) { - for(auto it = first; it != last; ++it) { + for(auto it = begin; it != end; ++it) { dissect_box(*it, cuts, out_dissected, 0); } } else { - out_dissected.insert(out_dissected.end(), first, last); + out_dissected.insert(out_dissected.end(), begin, end); } } // Collect the sorted, unique list of box start- and end points along a single dimension. These can then be used in dissect_boxes. template -std::vector collect_dissection_lines(const InputIterator first, const InputIterator last, int dim) { +std::vector collect_dissection_lines(const InputIterator begin, const InputIterator end, int dim) { std::vector cuts; // allocating 2*N integers might seem wasteful, but this has negligible runtime in the profiler and is already algorithmically optimal at O(N log N) - cuts.reserve(std::distance(first, last) * 2); - for(auto it = first; it != last; ++it) { + cuts.reserve(std::distance(begin, end) * 2); + for(auto it = begin; it != end; ++it) { cuts.push_back(it->get_min()[dim]); cuts.push_back(it->get_max()[dim]); } std::sort(cuts.begin(), cuts.end()); cuts.erase(std::unique(cuts.begin(), cuts.end()), cuts.end()); - assert(first == last || cuts.size() >= 2); + assert(begin == end || cuts.size() >= 2); return cuts; } @@ -308,18 +308,18 @@ region region_intersection_impl(const region& lhs, con } // No dissection step is necessary as the intersection of two normalized tilings is already "maximally mergeable". - const auto first = intersection.begin(); - auto last = intersection.end(); - last = grid_detail::merge_connected_boxes(first, last); + const auto begin = intersection.begin(); + auto end = intersection.end(); + end = grid_detail::merge_connected_boxes(begin, end); // intersected_boxes retains the sorting from lhs, but for Dims > 1, the intersection can shift min-points such that the box_coordinate_order reverses. if constexpr(EffectiveDims > 1) { - std::sort(first, last, box_coordinate_order()); + std::sort(begin, end, box_coordinate_order()); } else { - assert(std::is_sorted(first, last, box_coordinate_order())); + assert(std::is_sorted(begin, end, box_coordinate_order())); } - intersection.erase(last, intersection.end()); + intersection.erase(end, intersection.end()); return grid_detail::make_region(grid_detail::normalized, std::move(intersection)); } @@ -330,12 +330,12 @@ void apply_region_difference(box_vector& dissected_left, const regi // O(N * M) remove all dissected boxes from lhs that are fully covered by any box in rhs. // For further optimization potential see the comments on region_intersection_impl. - const auto first_left = dissected_left.begin(); - auto last_left = dissected_left.end(); + const auto left_begin = dissected_left.begin(); + auto left_end = dissected_left.end(); for(const auto& right : rhs.get_boxes()) { - for(auto left_it = first_left; left_it != last_left;) { + for(auto left_it = left_begin; left_it != left_end;) { if(grid_detail::box_covers(right, *left_it)) { - *left_it = *--last_left; + *left_it = *--left_end; } else { ++left_it; } @@ -343,8 +343,8 @@ void apply_region_difference(box_vector& dissected_left, const regi } // merge the now non-overlapping boxes - last_left = grid_detail::merge_connected_boxes(first_left, last_left); - dissected_left.erase(last_left, dissected_left.end()); + left_end = grid_detail::merge_connected_boxes(left_begin, left_end); + dissected_left.erase(left_end, dissected_left.end()); } } // namespace celerity::detail::grid_detail