Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize chained hyperslab selection. #1031

Merged
merged 2 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 108 additions & 14 deletions include/highfive/bits/H5Slice_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,20 +162,7 @@ class HyperSlab {
}

DataSpace apply(const DataSpace& space_) const {
auto space = space_.clone();
for (const auto& sel: selects) {
if (sel.op == Op::None) {
detail::h5s_select_none(space.getId());
} else {
detail::h5s_select_hyperslab(space.getId(),
convert(sel.op),
sel.offset.empty() ? nullptr : sel.offset.data(),
sel.stride.empty() ? nullptr : sel.stride.data(),
sel.count.empty() ? nullptr : sel.count.data(),
sel.block.empty() ? nullptr : sel.block.data());
}
}
return space;
return apply_impl(space_);
}

private:
Expand Down Expand Up @@ -229,6 +216,113 @@ class HyperSlab {
};

std::vector<Select_> selects;

protected:
DataSpace select_none(const DataSpace& outer_space) const {
auto space = outer_space.clone();
detail::h5s_select_none(space.getId());
return space;
}

void select_hyperslab(DataSpace& space, const Select_& sel) const {
detail::h5s_select_hyperslab(space.getId(),
convert(sel.op),
sel.offset.empty() ? nullptr : sel.offset.data(),
sel.stride.empty() ? nullptr : sel.stride.data(),
sel.count.empty() ? nullptr : sel.count.data(),
sel.block.empty() ? nullptr : sel.block.data());
}

#if H5_VERSION_GE(1, 10, 6)
/// The length of a stream of `Op::Or` starting at `begin`.
size_t detect_streak(Select_ const* begin, Select_ const* end, Op op) const {
assert(op == Op::Or);
auto it = std::find_if(begin, end, [op](const Select_& sel) { return sel.op != op; });
return static_cast<size_t>(it - begin);
}

DataSpace combine_selections(const DataSpace& left_space,
Op op,
const DataSpace& right_space) const {
return detail::make_data_space(
H5Scombine_select(left_space.getId(), convert(op), right_space.getId()));
}

/// Reduce a sequence of `Op::Or` efficiently.
///
/// The issue is that `H5Sselect_hyperslab` runs in time that linear of the
/// number of block in the existing selection. Therefore, a loop that adds
/// slab-by-slab has quadratic runtime in the number of slabs.
///
/// Fortunately, `H5Scombine_select` doesn't suffer from the same problem.
/// However, it's only available in 1.10.6 and newer.
///
/// The solution is to use divide-and-conquer to reduce (long) streaks of
/// `Op::Or` in what seems to be log-linear time.
DataSpace reduce_streak(const DataSpace& outer_space,
Select_ const* begin,
Select_ const* end,
Op op) const {
assert(op == Op::Or);

if (begin == end) {
throw std::runtime_error("Broken logic in 'DataSpace::reduce_streak'.");
}

std::ptrdiff_t distance = end - begin;
if (distance == 1) {
auto space = select_none(outer_space);
select_hyperslab(space, *begin);
return space;
}

Select_ const* mid = begin + distance / 2;
auto right_space = reduce_streak(outer_space, begin, mid, op);
auto left_space = reduce_streak(outer_space, mid, end, op);

return combine_selections(left_space, op, right_space);
}

DataSpace apply_impl(const DataSpace& space_) const {
auto space = space_.clone();
auto n_selects = selects.size();
for (size_t i = 0; i < n_selects; ++i) {
auto begin = selects.data() + i;
auto end = selects.data() + n_selects;

auto n_ors = detect_streak(begin, end, Op::Or);

if (n_ors > 1) {
auto right_space = reduce_streak(space_, begin, begin + n_ors, Op::Or);
// Since HDF5 doesn't allow `combine_selections` with a None
// selection, we need to avoid the issue:
if (detail::h5s_get_select_type(space.getId()) == H5S_SEL_NONE) {
space = right_space;
} else {
space = combine_selections(space, Op::Or, right_space);
}
i += n_ors - 1;
} else if (selects[i].op == Op::None) {
detail::h5s_select_none(space.getId());
} else {
select_hyperslab(space, selects[i]);
}
}
return space;
}
#else
DataSpace apply_impl(const DataSpace& space_) const {
auto space = space_.clone();
for (const auto& sel: selects) {
if (sel.op == Op::None) {
detail::h5s_select_none(space.getId());
} else {
select_hyperslab(space, sel);
}
}
return space;
}
#endif
};

///
Expand Down
21 changes: 21 additions & 0 deletions include/highfive/bits/h5s_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <H5Ipublic.h>
#include <H5Spublic.h>
namespace HighFive {
namespace detail {
Expand Down Expand Up @@ -110,6 +111,26 @@ inline H5S_class_t h5s_get_simple_extent_type(hid_t space_id) {
return cls;
}

inline H5S_sel_type h5s_get_select_type(hid_t space_id) {
H5S_sel_type type = H5Sget_select_type(space_id);
if (type < 0) {
HDF5ErrMapper::ToException<DataSpaceException>("Unable to get type of selection.");
}

return type;
}

#if H5_VERSION_GE(1, 10, 6)
inline hid_t h5s_combine_select(hid_t space1_id, H5S_seloper_t op, hid_t space2_id) {
auto space_id = H5Scombine_select(space1_id, op, space2_id);
if (space_id == H5I_INVALID_HID) {
HDF5ErrMapper::ToException<DataSpaceException>("Unable to combine two selections.");
}

return space_id;
}
#endif


} // namespace detail
} // namespace HighFive
57 changes: 57 additions & 0 deletions tests/unit/test_high_five_selection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include <highfive/highfive.hpp>
#include "tests_high_five.hpp"
#include "data_generator.hpp"

using namespace HighFive;
using Catch::Matchers::Equals;
Expand Down Expand Up @@ -534,3 +535,59 @@ void irregularHyperSlabSelectionWriteTest() {
TEMPLATE_LIST_TEST_CASE("irregularHyperSlabSelectionWrite", "[template]", std::tuple<int>) {
irregularHyperSlabSelectionWriteTest<TestType>();
}

TEST_CASE("select_multiple_ors", "[hyperslab]") {
size_t n = 100, m = 20;
size_t nsel = 30;
auto x = testing::DataGenerator<std::vector<std::vector<int>>>::create({n, m});

auto file = File("select_multiple_ors.h5", File::Truncate);
auto dset = file.createDataSet("x", x);

std::vector<std::array<size_t, 2>> indices;
auto hyperslab = HyperSlab();
for (size_t i = 0; i < nsel; ++i) {
std::vector<size_t> offsets{i, i % 10};
std::vector<size_t> counts{1, 3};
hyperslab |= RegularHyperSlab(offsets, counts);

for (size_t k = 0; k < counts[1]; ++k) {
indices.push_back({offsets[0], offsets[1] + k});
}
}

SECTION("Pure Or Chain") {
auto selected = dset.select(hyperslab).read<std::vector<int>>();
REQUIRE(selected.size() == indices.size());
for (size_t k = 0; k < selected.size(); ++k) {
size_t i = indices[k][0];
size_t j = indices[k][1];
REQUIRE(selected[k] == x[i][j]);
}
}

SECTION("Or Chain And Slab") {
std::vector<size_t> offsets{5, 2};
std::vector<size_t> counts{85, 12};

std::vector<std::array<size_t, 2>> selected_indices;
for (const auto ij: indices) {
std::array<size_t, 2> ij_max = {offsets[0] + counts[0], offsets[1] + counts[1]};

if (offsets[0] <= ij[0] && ij[0] < ij_max[0] && offsets[1] <= ij[1] &&
ij[1] < ij_max[1]) {
selected_indices.push_back(ij);
}
}

hyperslab &= RegularHyperSlab(offsets, counts);

auto selected = dset.select(hyperslab).read<std::vector<int>>();
REQUIRE(selected.size() == selected_indices.size());
for (size_t k = 0; k < selected.size(); ++k) {
size_t i = selected_indices[k][0];
size_t j = selected_indices[k][1];
REQUIRE(selected[k] == x[i][j]);
}
}
}
Loading