diff --git a/CHANGELOG.md b/CHANGELOG.md index 50bfe300..37a4129f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Improved performance of window queries by executing them partially as point queries. This works best for point datasets, and somewhat for box datasets with "include" queries. There is no benefit for "intersection" queries. [#88](https://github.com/tzaeschke/phtree-cpp/issues/88) -- Improved benchmarks for insert and query to use a more compact format. [91](https://github.com/tzaeschke/phtree-cpp/pull/91) +- Improved benchmarks for insert and query to use a more compact format. [#91](https://github.com/tzaeschke/phtree-cpp/pull/91) +- Improved performance of window queries by optimizing calculation of min/max masks. + Improved performance of queries and updates by changing bit-width of min/max masks and + hc_pos_t. [#95](https://github.com/tzaeschke/phtree-cpp/pull/95) ### Removed - bazel version requirement file `.bazelversion`. [#89](https://github.com/tzaeschke/phtree-cpp/issues/89) diff --git a/benchmark/hd_query_d_benchmark.cc b/benchmark/hd_query_d_benchmark.cc index ac2ac82a..25f268ce 100644 --- a/benchmark/hd_query_d_benchmark.cc +++ b/benchmark/hd_query_d_benchmark.cc @@ -172,6 +172,18 @@ void PhTree6D_FE(benchmark::State& state, Arguments&&...) { benchmark.Benchmark(state); } +template +void PhTree10D_FE(benchmark::State& state, Arguments&&...) { + IndexBenchmark<10, MIN_MAX_FOR_EACH> benchmark{state}; + benchmark.Benchmark(state); +} + +template +void PhTree20D_FE(benchmark::State& state, Arguments&&...) { + IndexBenchmark<20, MIN_MAX_FOR_EACH> benchmark{state}; + benchmark.Benchmark(state); +} + template void PhTree6D_IT(benchmark::State& state, Arguments&&...) { IndexBenchmark<6, MIN_MAX_ITER> benchmark{state}; @@ -196,6 +208,16 @@ BENCHMARK_CAPTURE(PhTree6D_FE, WQ, 0) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) ->Unit(benchmark::kMillisecond); +BENCHMARK_CAPTURE(PhTree10D_FE, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree20D_FE, WQ, 0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) + ->Unit(benchmark::kMillisecond); + BENCHMARK_CAPTURE(PhTree6D_IT, WQ, 0) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CLUSTER, TestGenerator::CUBE}}) diff --git a/include/phtree/common/base_types.h b/include/phtree/common/base_types.h index a95a721b..dcb91dac 100644 --- a/include/phtree/common/base_types.h +++ b/include/phtree/common/base_types.h @@ -55,7 +55,15 @@ using bit_mask_t = typename std::make_unsigned::type; template static constexpr bit_mask_t MAX_MASK = std::numeric_limits>::max(); using dimension_t = size_t; // Number of dimensions -using hc_pos_t = uint64_t; +// We have two types that represent hypercube addresses (HC position). +// The hc_pos_dim_t uses a template parameter to determine how many bits are needed, this is either +// 32bit or 64bit. This parameter is used where HC positions are stored because benchmarks show a +// difference in performance when this is used. +// The hc_pos_64_t type is always set to 64. It is used where computations play a role that appear +// to prefer being in always 64bit, mainly in CalcPosInArray() and in Node. +template +using hc_pos_dim_t = std::conditional_t<(DIM < 32), uint32_t, uint64_t>; +using hc_pos_64_t = uint64_t; // ************************************************************************ // Basic structs and classes diff --git a/include/phtree/common/common.h b/include/phtree/common/common.h index 638d0e0a..92fb40a0 100644 --- a/include/phtree/common/common.h +++ b/include/phtree/common/common.h @@ -49,7 +49,7 @@ namespace improbable::phtree { * an array. */ template -static hc_pos_t CalcPosInArray(const PhPoint& valSet, bit_width_t postfix_len) { +static hc_pos_64_t CalcPosInArray(const PhPoint& valSet, bit_width_t postfix_len) { // n=DIM, i={0..n-1} // i = 0 : |0|1|0|1|0|1|0|1| // i = 1 : | 0 | 1 | 0 | 1 | @@ -58,7 +58,7 @@ static hc_pos_t CalcPosInArray(const PhPoint& valSet, bit_width_t p // Following formula was for inverse ordering of current ordering... // pos = sum (i=1..n, len/2^i) = sum (..., 2^(n-i)) bit_mask_t valMask = bit_mask_t(1) << postfix_len; - hc_pos_t pos = 0; + hc_pos_64_t pos = 0; for (dimension_t i = 0; i < DIM; ++i) { pos <<= 1; // set pos-bit if bit is set in value diff --git a/include/phtree/common/flat_sparse_map.h b/include/phtree/common/flat_sparse_map.h index f822d3d8..15bf9eac 100644 --- a/include/phtree/common/flat_sparse_map.h +++ b/include/phtree/common/flat_sparse_map.h @@ -30,27 +30,22 @@ */ namespace improbable::phtree { -namespace { -template -using PhSparseMapPair = std::pair; - -using index_t = std::int32_t; -} // namespace - /* * The sparse_map is a flat map implementation that uses an array of *at* *most* SIZE=2^DIM. * The array contains a list sorted by key. * * It has O(log n) lookup and O(n) insertion/removal time complexity, space complexity is O(n). */ -template +template class sparse_map { + using Entry = std::pair; + public: explicit sparse_map() : data_{} { data_.reserve(4); } - [[nodiscard]] auto find(size_t key) { + [[nodiscard]] auto find(KeyT key) { auto it = lower_bound(key); if (it != data_.end() && it->first == key) { return it; @@ -58,7 +53,7 @@ class sparse_map { return data_.end(); } - [[nodiscard]] auto find(size_t key) const { + [[nodiscard]] auto find(KeyT key) const { auto it = lower_bound(key); if (it != data_.end() && it->first == key) { return it; @@ -66,16 +61,15 @@ class sparse_map { return data_.end(); } - [[nodiscard]] auto lower_bound(size_t key) { - return std::lower_bound( - data_.begin(), data_.end(), key, [](PhSparseMapPair& left, const size_t key) { - return left.first < key; - }); + [[nodiscard]] auto lower_bound(KeyT key) { + return std::lower_bound(data_.begin(), data_.end(), key, [](Entry& left, const KeyT key) { + return left.first < key; + }); } - [[nodiscard]] auto lower_bound(size_t key) const { + [[nodiscard]] auto lower_bound(KeyT key) const { return std::lower_bound( - data_.cbegin(), data_.cend(), key, [](const PhSparseMapPair& left, const size_t key) { + data_.cbegin(), data_.cend(), key, [](const Entry& left, const KeyT key) { return left.first < key; }); } @@ -110,14 +104,14 @@ class sparse_map { return try_emplace_base(key, std::forward(args)...); } - void erase(size_t key) { + void erase(KeyT key) { auto it = lower_bound(key); if (it != end() && it->first == key) { data_.erase(it); } } - void erase(const typename std::vector>::iterator& iterator) { + void erase(const typename std::vector::iterator& iterator) { data_.erase(iterator); } @@ -127,7 +121,7 @@ class sparse_map { private: template - auto emplace_base(size_t key, Args&&... args) { + auto emplace_base(KeyT key, Args&&... args) { auto it = lower_bound(key); if (it != end() && it->first == key) { return std::make_pair(it, false); @@ -137,7 +131,7 @@ class sparse_map { } template - auto try_emplace_base(size_t key, Args&&... args) { + auto try_emplace_base(KeyT key, Args&&... args) { auto it = lower_bound(key); if (it != end() && it->first == key) { return std::make_pair(it, false); @@ -151,7 +145,7 @@ class sparse_map { } } - std::vector> data_; + std::vector data_; }; } // namespace improbable::phtree diff --git a/include/phtree/v16/debug_helper_v16.h b/include/phtree/v16/debug_helper_v16.h index bb62942f..017c8a54 100644 --- a/include/phtree/v16/debug_helper_v16.h +++ b/include/phtree/v16/debug_helper_v16.h @@ -17,9 +17,9 @@ #ifndef PHTREE_V16_DEBUG_HELPER_H #define PHTREE_V16_DEBUG_HELPER_H +#include "node.h" #include "phtree/common/common.h" #include "phtree/common/debug_helper.h" -#include "node.h" #include "phtree_v16.h" #include @@ -99,7 +99,7 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { bit_width_t current_depth, const EntryT& entry, const bit_width_t parent_postfix_len, - bool printValue) const { + bool print_value) const { std::string ind = "*"; for (bit_width_t i = 0; i < current_depth; ++i) { ind += "-"; @@ -127,15 +127,15 @@ class DebugHelperV16 : public PhTreeDebugHelper::DebugHelper { // To clean previous postfixes. for (auto& it : node.Entries()) { const auto& child = it.second; - hc_pos_t hcPos = it.first; + auto hc_pos = it.first; if (child.IsNode()) { - sb << ind << "# " << hcPos << " Node: " << std::endl; - ToStringTree(sb, current_depth + 1, child, postfix_len, printValue); + sb << ind << "# " << hc_pos << " Node: " << std::endl; + ToStringTree(sb, current_depth + 1, child, postfix_len, print_value); } else { // post-fix sb << ind << ToBinary(child.GetKey()); - sb << " hcPos=" << hcPos; - if (printValue) { + sb << " hcPos=" << hc_pos; + if (print_value) { sb << " v=" << (child.IsValue() ? "T" : "null"); } sb << std::endl; diff --git a/include/phtree/v16/entry.h b/include/phtree/v16/entry.h index 6b2a2dbf..7f0acf97 100644 --- a/include/phtree/v16/entry.h +++ b/include/phtree/v16/entry.h @@ -125,6 +125,26 @@ class Entry { DestroyUnion(); } + void SetNodeCenter() { + // The node center is defined as the prefix + a '1' bit after the prefix. The remaining + // bits, i.e. all post_len bits must be '0'. + // This is required for window queries which would otherwise need to calculate the + // center each time they traverse a node. + assert(union_type_ == NODE); + bit_mask_t maskHcBit = bit_mask_t(1) << postfix_len_; + bit_mask_t maskVT = MAX_MASK << postfix_len_; + // to prevent problems with signed long when using 64 bit + if (postfix_len_ < MAX_BIT_WIDTH - 1) { + for (dimension_t i = 0; i < DIM; ++i) { + kd_key_[i] = (kd_key_[i] | maskHcBit) & maskVT; + } + } else { + for (dimension_t i = 0; i < DIM; ++i) { + kd_key_[i] = 0; + } + } + } + [[nodiscard]] const KeyT& GetKey() const { return kd_key_; } @@ -148,6 +168,7 @@ class Entry { } void SetKey(const KeyT& key) noexcept { + assert(union_type_ == VALUE); // Do we have any other use? kd_key_ = key; } @@ -157,6 +178,7 @@ class Entry { union_type_ = NODE; new (&node_) std::unique_ptr{std::move(node)}; assert(!node); + SetNodeCenter(); } [[nodiscard]] bit_width_t GetNodePostfixLen() const noexcept { @@ -194,6 +216,9 @@ class Entry { union_type_ = EMPTY; *this = std::move(other); node.reset(); + if (IsNode()) { + SetNodeCenter(); + } } private: diff --git a/include/phtree/v16/for_each_hc.h b/include/phtree/v16/for_each_hc.h index 4500d76d..ef2a7c6d 100644 --- a/include/phtree/v16/for_each_hc.h +++ b/include/phtree/v16/for_each_hc.h @@ -40,6 +40,7 @@ class ForEachHC { using KeyInternal = typename CONVERT::KeyInternal; using SCALAR = typename CONVERT::ScalarInternal; using EntryT = Entry; + using hc_pos_t = hc_pos_dim_t; public: template @@ -64,7 +65,6 @@ class ForEachHC { auto postfix_len = entry.GetNodePostfixLen(); auto end = entries.end(); auto iter = opt_it != nullptr && *opt_it != end ? *opt_it : entries.lower_bound(mask_lower); - //auto iter = opt_it != nullptr ? *opt_it : entries.lower_bound(mask_lower); for (; iter != end && iter->first <= mask_upper; ++iter) { auto child_hc_pos = iter->first; // Use bit-mask magic to check whether we are in a valid quadrant. @@ -94,18 +94,17 @@ class ForEachHC { // An infix with len=0 implies that at least part of the child node overlaps with the query, // otherwise the bit mask checking would have returned 'false'. // Putting it differently, if the infix has len=0, then there is no point in validating it. + bool mismatch = false; if (entry.HasNodeInfix(parent_postfix_len)) { // Mask for comparing the prefix with the query boundaries. assert(entry.GetNodePostfixLen() + 1 < MAX_BIT_WIDTH); SCALAR comparison_mask = MAX_MASK << (entry.GetNodePostfixLen() + 1); for (dimension_t dim = 0; dim < DIM; ++dim) { SCALAR prefix = key[dim] & comparison_mask; - if (prefix > range_max_[dim] || prefix < (range_min_[dim] & comparison_mask)) { - return false; - } + mismatch |= (prefix > range_max_[dim] || prefix < (range_min_[dim] & comparison_mask)); } } - return filter_.IsNodeValid(key, entry.GetNodePostfixLen() + 1); + return mismatch ? false : filter_.IsNodeValid(key, entry.GetNodePostfixLen() + 1); } void CalcLimits( @@ -127,23 +126,17 @@ class ForEachHC { // query higher || NO YES // assert(postfix_len < MAX_BIT_WIDTH); - bit_mask_t maskHcBit = bit_mask_t(1) << postfix_len; - bit_mask_t maskVT = MAX_MASK << postfix_len; - constexpr hc_pos_t ONE = 1; // to prevent problems with signed long when using 64 bit if (postfix_len < MAX_BIT_WIDTH - 1) { for (dimension_t i = 0; i < DIM; ++i) { lower_limit <<= 1; + //==> set to 1 if lower value should not be queried + lower_limit |= range_min_[i] >= prefix[i]; + } + for (dimension_t i = 0; i < DIM; ++i) { upper_limit <<= 1; - SCALAR nodeBisection = (prefix[i] | maskHcBit) & maskVT; - if (range_min_[i] >= nodeBisection) { - //==> set to 1 if lower value should not be queried - lower_limit |= ONE; - } - if (range_max_[i] >= nodeBisection) { - // Leave 0 if higher value should not be queried. - upper_limit |= ONE; - } + // Leave 0 if higher value should not be queried. + upper_limit |= range_max_[i] >= prefix[i]; } } else { // special treatment for signed longs @@ -152,20 +145,18 @@ class ForEachHC { // The hypercube assumes that a leading '0' indicates a lower value. // Solution: We leave HC as it is. for (dimension_t i = 0; i < DIM; ++i) { - lower_limit <<= 1; upper_limit <<= 1; - if (range_min_[i] < 0) { - // If minimum is positive, we don't need the search negative values - //==> set upper_limit to 0, prevent searching values starting with '1'. - upper_limit |= ONE; - } - if (range_max_[i] < 0) { - // Leave 0 if higher value should not be queried - // If maximum is negative, we do not need to search positive values - //(starting with '0'). - //--> lower_limit = '1' - lower_limit |= ONE; - } + // If minimum is positive, we don't need the search negative values + //==> set upper_limit to 0, prevent searching values starting with '1'. + upper_limit |= range_min_[i] < 0; + } + for (dimension_t i = 0; i < DIM; ++i) { + lower_limit <<= 1; + // Leave 0 if higher value should not be queried + // If maximum is negative, we do not need to search positive values + //(starting with '0'). + //--> lower_limit = '1' + lower_limit |= range_max_[i] < 0; } } } diff --git a/include/phtree/v16/iterator_hc.h b/include/phtree/v16/iterator_hc.h index cd71794a..9c9f8f03 100644 --- a/include/phtree/v16/iterator_hc.h +++ b/include/phtree/v16/iterator_hc.h @@ -17,8 +17,8 @@ #ifndef PHTREE_V16_ITERATOR_HC_H #define PHTREE_V16_ITERATOR_HC_H -#include "phtree/common/common.h" #include "iterator_with_parent.h" +#include "phtree/common/common.h" namespace improbable::phtree::v16 { @@ -134,6 +134,7 @@ class NodeIterator { using KeyT = PhPoint; using EntryT = Entry; using EntriesT = EntryMap; + using hc_pos_t = hc_pos_dim_t; public: NodeIterator() : iter_{}, entries_{nullptr}, mask_lower_{0}, mask_upper_{0}, postfix_len_{0} {} @@ -210,25 +211,19 @@ class NodeIterator { // query higher || NO YES // assert(postfix_len < MAX_BIT_WIDTH); - bit_mask_t maskHcBit = bit_mask_t(1) << postfix_len; - bit_mask_t maskVT = MAX_MASK << postfix_len; hc_pos_t lower_limit = 0; hc_pos_t upper_limit = 0; - constexpr hc_pos_t ONE = 1; // to prevent problems with signed long when using 64 bit if (postfix_len < MAX_BIT_WIDTH - 1) { for (dimension_t i = 0; i < DIM; ++i) { lower_limit <<= 1; + //==> set to 1 if lower value should not be queried + lower_limit |= range_min[i] >= prefix[i]; + } + for (dimension_t i = 0; i < DIM; ++i) { upper_limit <<= 1; - SCALAR nodeBisection = (prefix[i] | maskHcBit) & maskVT; - if (range_min[i] >= nodeBisection) { - //==> set to 1 if lower value should not be queried - lower_limit |= ONE; - } - if (range_max[i] >= nodeBisection) { - // Leave 0 if higher value should not be queried. - upper_limit |= ONE; - } + // Leave 0 if higher value should not be queried. + upper_limit |= range_max[i] >= prefix[i]; } } else { // special treatment for signed longs @@ -236,22 +231,19 @@ class NodeIterator { // LOWER value, opposed to indicating a HIGHER value as in the remaining 63 bits. // The hypercube assumes that a leading '0' indicates a lower value. // Solution: We leave HC as it is. - for (dimension_t i = 0; i < DIM; ++i) { - lower_limit <<= 1; upper_limit <<= 1; - if (range_min[i] < 0) { - // If minimum is positive, we don't need the search negative values - //==> set upper_limit to 0, prevent searching values starting with '1'. - upper_limit |= ONE; - } - if (range_max[i] < 0) { - // Leave 0 if higher value should not be queried - // If maximum is negative, we do not need to search positive values - //(starting with '0'). - //--> lower_limit = '1' - lower_limit |= ONE; - } + // If minimum is positive, we don't need the search negative values + //==> set upper_limit to 0, prevent searching values starting with '1'. + upper_limit |= range_min[i] < 0; + } + for (dimension_t i = 0; i < DIM; ++i) { + lower_limit <<= 1; + // Leave 0 if higher value should not be queried + // If maximum is negative, we do not need to search positive values + //(starting with '0'). + //--> lower_limit = '1' + lower_limit |= range_max[i] < 0; } } mask_lower_ = lower_limit; diff --git a/include/phtree/v16/iterator_knn_hs.h b/include/phtree/v16/iterator_knn_hs.h index 5af0902e..7d1b7195 100644 --- a/include/phtree/v16/iterator_knn_hs.h +++ b/include/phtree/v16/iterator_knn_hs.h @@ -17,8 +17,8 @@ #ifndef PHTREE_V16_QUERY_KNN_HS_H #define PHTREE_V16_QUERY_KNN_HS_H -#include "phtree/common/common.h" #include "iterator_base.h" +#include "phtree/common/common.h" #include namespace improbable::phtree::v16 { @@ -134,11 +134,9 @@ class IteratorKnnHS : public IteratorWithFilter { SCALAR mask_min = MAX_MASK << bits_to_ignore; SCALAR mask_max = ~mask_min; KeyInternal buf; - // The following calculates the point inside of the node that is closest to center_. - // If center is inside the node this returns center_, otherwise it finds a point on the - // node's surface. + // The following calculates the point inside the node that is closest to center_. for (dimension_t i = 0; i < DIM; ++i) { - // if center_[i] is outside the node, return distance to closest edge, + // if center_[i] is outside the node, return distance to the closest edge, // otherwise return center_[i] itself (assume possible distance=0) SCALAR min = prefix[i] & mask_min; SCALAR max = prefix[i] | mask_max; diff --git a/include/phtree/v16/node.h b/include/phtree/v16/node.h index 765ef69a..641e4941 100644 --- a/include/phtree/v16/node.h +++ b/include/phtree/v16/node.h @@ -33,7 +33,7 @@ namespace improbable::phtree::v16 { * - `array_map` is the fastest, but has O(2^DIM) space complexity. This can be very wasteful * because many nodes may have only 2 entries. * Also, iteration depends on some bit operations and is also O(DIM) per step if the CPU/compiler - * does not support CTZ (count trailing bits). + * does not support CTZ (count trailing zeroes). * - `sparse_map` is slower, but requires only O(n) memory (n = number of entries/children). * However, insertion/deletion is O(n), i.e. O(2^DIM) time complexity in the worst case. * - 'b_plus_tree_map` is the least efficient for small node sizes but scales best with larger @@ -42,10 +42,11 @@ namespace improbable::phtree::v16 { template using EntryMap = typename std::conditional< DIM <= 3, - array_map, - typename std:: - conditional, b_plus_tree_map>:: - type>::type; + array_map, + typename std::conditional< + DIM <= 8, + sparse_map, Entry>, + b_plus_tree_map>::type>::type; template using EntryIterator = decltype(EntryMap().begin()); @@ -75,6 +76,7 @@ template class Node { using KeyT = PhPoint; using EntryT = Entry; + using hc_pos_t = hc_pos_64_t; public: Node() : entries_{} {} @@ -251,6 +253,20 @@ class Node { ++num_entries_local; } } + + // Check node center + auto post_len = current_entry.GetNodePostfixLen(); + if (post_len == MAX_BIT_WIDTH - 1) { + for (auto d : current_entry.GetKey()) { + assert(d == 0); + } + } else { + for (auto d : current_entry.GetKey()) { + assert(((d >> post_len) & 0x1) == 1 && "Last bit of node center must be `1`"); + assert(((d >> post_len) << post_len) == d && "postlen bits must all be `0`"); + } + } + return num_entries_local + num_entries_children; } diff --git a/test/common/flat_sparse_map_test.cc b/test/common/flat_sparse_map_test.cc index 99d581d7..b4c3adc3 100644 --- a/test/common/flat_sparse_map_test.cc +++ b/test/common/flat_sparse_map_test.cc @@ -27,7 +27,7 @@ TEST(PhTreeFlatSparseMapTest, SmokeTest) { std::uniform_int_distribution<> cube_distribution(0, max_size - 1); for (int i = 0; i < 10; i++) { - sparse_map test_map; + sparse_map test_map; std::map reference_map; for (int j = 0; j < 2 * max_size; j++) { size_t val = cube_distribution(random_engine); @@ -61,7 +61,7 @@ TEST(PhTreeFlatSparseMapTest, SmokeTestWithTryEmplace) { std::uniform_int_distribution<> cube_distribution(0, max_size - 1); for (int i = 0; i < 10; i++) { - sparse_map test_map; + sparse_map test_map; std::map reference_map; for (int j = 0; j < 2 * max_size; j++) { size_t val = cube_distribution(random_engine);