From f7761296a868a23709aa39c1c7aefb1827cfab85 Mon Sep 17 00:00:00 2001 From: tzaeschke Date: Mon, 19 Dec 2022 18:35:50 +0100 Subject: [PATCH 1/3] initial --- benchmark/count_mm_d_benchmark.cc | 12 ++- benchmark/find_benchmark.cc | 111 ++++++++--------------- benchmark/query_benchmark.cc | 11 +-- benchmark/query_box_d_benchmark.cc | 10 +- benchmark/query_d_benchmark.cc | 11 +-- benchmark/query_mm_box_d_benchmark.cc | 12 ++- benchmark/query_mm_d_benchmark.cc | 64 +++++-------- benchmark/query_mm_d_filter_benchmark.cc | 8 +- include/phtree/common/flat_array_map.h | 37 ++++---- 9 files changed, 112 insertions(+), 164 deletions(-) diff --git a/benchmark/count_mm_d_benchmark.cc b/benchmark/count_mm_d_benchmark.cc index b05987bd..a2840cb7 100644 --- a/benchmark/count_mm_d_benchmark.cc +++ b/benchmark/count_mm_d_benchmark.cc @@ -67,7 +67,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr double query_endge_length() { + constexpr double query_edge_length() { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; @@ -165,11 +165,13 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, const Qu template void IndexBenchmark::CreateQuery(Query& query) { - double radius = query_endge_length() * 0.5; + double length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto s = cube_distribution_(random_engine_); - query.box.min()[d] = s - radius; - query.box.max()[d] = s + radius; + auto s = shift * cube_distribution_(random_engine_); + query.box.min()[d] = s; + query.box.max()[d] = s + length; } } diff --git a/benchmark/find_benchmark.cc b/benchmark/find_benchmark.cc index c7abe529..132a3236 100644 --- a/benchmark/find_benchmark.cc +++ b/benchmark/find_benchmark.cc @@ -35,18 +35,17 @@ enum QueryType { /* * Benchmark for looking up entries by their key. */ -template +template class IndexBenchmark { public: - IndexBenchmark( - benchmark::State& state, TestGenerator data_type, int num_entities, QueryType query_type); + IndexBenchmark(benchmark::State& state, double dummy); void Benchmark(benchmark::State& state); private: void SetupWorld(benchmark::State& state); - int QueryWorldCount(benchmark::State& state); - int QueryWorldFind(benchmark::State& state); + int QueryWorldCount(); + int QueryWorldFind(); const TestGenerator data_type_; const size_t num_entities_; @@ -58,34 +57,33 @@ class IndexBenchmark { std::vector> points_; }; -template -IndexBenchmark::IndexBenchmark( - benchmark::State& state, TestGenerator data_type, int num_entities, QueryType query_type) -: data_type_{data_type} -, num_entities_(num_entities) -, query_type_(query_type) +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, query_type_(QUERY_TYPE) , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} -, points_(num_entities) { +, points_(state.range(0)) { logging::SetupDefaultLogging(); SetupWorld(state); } -template -void IndexBenchmark::Benchmark(benchmark::State& state) { +template +void IndexBenchmark::Benchmark(benchmark::State& state) { int num_inner = 0; int num_found = 0; switch (query_type_) { case COUNT: { for (auto _ : state) { - num_found += QueryWorldCount(state); + num_found += QueryWorldCount(); ++num_inner; } break; } case FIND: { for (auto _ : state) { - num_found += QueryWorldFind(state); + num_found += QueryWorldFind(); ++num_inner; } break; @@ -98,8 +96,8 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { state.counters["avg_result_count"] += num_found; } -template -void IndexBenchmark::SetupWorld(benchmark::State& state) { +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); for (size_t i = 0; i < num_entities_; ++i) { @@ -110,27 +108,27 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - logging::info("World setup complete."); } -template -int IndexBenchmark::QueryWorldCount(benchmark::State&) { +template +int IndexBenchmark::QueryWorldCount() { static int pos = 0; pos = (pos + 1) % num_entities_; - bool found = true; + bool found; if (pos % 2 == 0) { - assert(tree_.find(points_.at(pos)) != tree_.end()); + found = tree_.count(points_.at(pos)); + assert(found); } else { int x = pos % GLOBAL_MAX; PhPoint p = PhPoint({x, x, x}); - found = tree_.find(p) != tree_.end(); + found = tree_.count(p); } return found; } -template -int IndexBenchmark::QueryWorldFind(benchmark::State&) { +template +int IndexBenchmark::QueryWorldFind() { static int pos = 0; pos = (pos + 1) % num_entities_; bool found; @@ -150,61 +148,26 @@ int IndexBenchmark::QueryWorldFind(benchmark::State&) { } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; +void PhTree3DCount(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, QueryType::COUNT> benchmark{state, arguments...}; benchmark.Benchmark(state); } -// index type, scenario name, data_generator, num_entities, function_to_call -// PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_1K, TestGenerator::CUBE, 1000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_10K, TestGenerator::CUBE, 10000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_100K, TestGenerator::CUBE, 100000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_1M, TestGenerator::CUBE, 1000000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_1K, TestGenerator::CUBE, 1000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_10K, TestGenerator::CUBE, 10000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_100K, TestGenerator::CUBE, 100000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_1M, TestGenerator::CUBE, 1000000, FIND) - ->Unit(benchmark::kMillisecond); +template +void PhTree3DFind(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, QueryType::FIND> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} // index type, scenario name, data_generator, num_entities, function_to_call -// PhTree 3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_1K, TestGenerator::CLUSTER, 1000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_10K, TestGenerator::CLUSTER, 10000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_100K, TestGenerator::CLUSTER, 100000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_1M, TestGenerator::CLUSTER, 1000000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_1K, TestGenerator::CLUSTER, 1000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_10K, TestGenerator::CLUSTER, 10000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_100K, TestGenerator::CLUSTER, 100000, FIND) +BENCHMARK_CAPTURE(PhTree3DCount, COUNT, 0.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_1M, TestGenerator::CLUSTER, 1000000, FIND) +BENCHMARK_CAPTURE(PhTree3DFind, FIND, 0.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/benchmark/query_benchmark.cc b/benchmark/query_benchmark.cc index dd24386b..5fa89813 100644 --- a/benchmark/query_benchmark.cc +++ b/benchmark/query_benchmark.cc @@ -46,7 +46,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr int query_endge_length() { + constexpr int query_edge_length() { return ( int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; @@ -110,12 +110,11 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, PhBox& query_ template void IndexBenchmark::CreateQuery(PhBox& query_box) { - int length = query_endge_length(); - // scale to ensure query lies within boundary - double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + int length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - scalar_64_t s = cube_distribution_(random_engine_); - s = (scalar_64_t)(s * scale); + scalar_64_t s = (scalar_64_t)(shift * cube_distribution_(random_engine_)); query_box.min()[d] = s; query_box.max()[d] = s + length; } diff --git a/benchmark/query_box_d_benchmark.cc b/benchmark/query_box_d_benchmark.cc index 5f8a6682..e88bf00f 100644 --- a/benchmark/query_box_d_benchmark.cc +++ b/benchmark/query_box_d_benchmark.cc @@ -58,7 +58,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr int query_endge_length() { + constexpr int query_edge_length() { return ( int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; @@ -153,12 +153,12 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, BoxTyp template void IndexBenchmark::CreateQuery(BoxType& query_box) { - int length = query_endge_length(); + int length = query_edge_length(); // scale to ensure query lies within boundary - double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto s = cube_distribution_(random_engine_); - s = s * scale; + auto s = shift * cube_distribution_(random_engine_); + s = s * shift; query_box.min()[d] = s; query_box.max()[d] = s + length; } diff --git a/benchmark/query_d_benchmark.cc b/benchmark/query_d_benchmark.cc index b487b1ca..2c6ad3ec 100644 --- a/benchmark/query_d_benchmark.cc +++ b/benchmark/query_d_benchmark.cc @@ -57,7 +57,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr int query_endge_length() { + constexpr int query_edge_length() { return ( int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; @@ -152,12 +152,11 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, BoxTyp template void IndexBenchmark::CreateQuery(BoxType& query_box) { - int length = query_endge_length(); - // scale to ensure query lies within boundary - double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + int length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto s = cube_distribution_(random_engine_); - s = s * scale; + auto s = shift * cube_distribution_(random_engine_); query_box.min()[d] = s; query_box.max()[d] = s + length; } diff --git a/benchmark/query_mm_box_d_benchmark.cc b/benchmark/query_mm_box_d_benchmark.cc index 9f42cccb..51dd6c72 100644 --- a/benchmark/query_mm_box_d_benchmark.cc +++ b/benchmark/query_mm_box_d_benchmark.cc @@ -68,7 +68,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr double query_endge_length() { + constexpr double query_edge_length() { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; @@ -184,11 +184,13 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, const Qu template void IndexBenchmark::CreateQuery(Query& query) { - double radius = query_endge_length() * 0.5; + double length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto x = cube_distribution_(random_engine_); - query.box_.min()[d] = x - radius; - query.box_.max()[d] = x + radius; + auto x = shift * cube_distribution_(random_engine_); + query.box_.min()[d] = x; + query.box_.max()[d] = x + length; } } diff --git a/benchmark/query_mm_d_benchmark.cc b/benchmark/query_mm_d_benchmark.cc index 24a2de5d..ab0b4054 100644 --- a/benchmark/query_mm_d_benchmark.cc +++ b/benchmark/query_mm_d_benchmark.cc @@ -41,8 +41,6 @@ using BucketType = std::set; struct Query { QueryBox box{}; - TestPoint center{}; - double radius{}; }; template @@ -73,7 +71,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr double query_endge_length() { + constexpr double query_edge_length() { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; @@ -131,53 +129,41 @@ void InsertEntry( tree.emplace(point, data); } -int CheckPosition(const payload_t& entity, const TestPoint& center, double radius) { - const auto& point = entity; - bool dx = abs(center[0] - point[0]) <= radius; - bool dy = abs(center[1] - point[1]) <= radius; - bool dz = abs(center[2] - point[2]) <= radius; - return dx && dy && dz ? 1 : -100000000; -} - struct CounterTreeWithMap { void operator()(const PhPointD<3>&, const BucketType& value) { for (auto& x : value) { - // n_ += (x.entity_id_ >= 0); - n_ += CheckPosition(x, center_, radius_); + (void)x; + ++n_; } } - const TestPoint& center_; - double radius_; size_t n_; }; struct CounterMultiMap { - void operator()(const PhPointD<3>&, const payload_t& value) { - n_ += CheckPosition(value, center_, radius_); + void operator()(const PhPointD<3>&, const payload_t&) { + ++n_; } - const TestPoint& center_; - double radius_; size_t n_; }; template typename std::enable_if::type CountEntries( TestMap& tree, const Query& query) { - CounterTreeWithMap counter{query.center, query.radius, 0}; + CounterTreeWithMap counter{0}; tree.for_each(query.box, counter); return counter.n_; } template size_t CountEntries(TestMap& tree, const Query& query) { - CounterMultiMap counter{query.center, query.radius, 0}; + CounterMultiMap counter{0}; tree.for_each(query.box, counter); return counter.n_; } template size_t CountEntries(TestMap& tree, const Query& query) { - CounterMultiMap counter{query.center, query.radius, 0}; + CounterMultiMap counter{0}; tree.for_each(query.box, counter); return counter.n_; } @@ -208,24 +194,18 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, const Qu template void IndexBenchmark::CreateQuery(Query& query) { - double radius = query_endge_length() * 0.5; + double length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto x = cube_distribution_(random_engine_); - query.box.min()[d] = x - radius; - query.box.max()[d] = x + radius; - query.center[d] = x; + auto x = shift * cube_distribution_(random_engine_); + query.box.min()[d] = x; + query.box.max()[d] = x + length; } - query.radius = radius; } } // namespace -template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; - benchmark.Benchmark(state); -} - template void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; @@ -238,21 +218,27 @@ void PhTreeMultiMapStd3D(benchmark::State& state, Arguments&&... arguments) { benchmark.Benchmark(state); } +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + // index type, scenario name, data_type, num_entities, avg_query_result_size -// PhTree -BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMultiMap3D, WQ_100, 100.0) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); // PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMap3D, WQ_100, 100.0) +BENCHMARK_CAPTURE(PhTreeMultiMapStd3D, WQ_100, 100.0) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -// PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMapStd3D, WQ_100, 100.0) +// PhTree with manual BUCKET management +BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); diff --git a/benchmark/query_mm_d_filter_benchmark.cc b/benchmark/query_mm_d_filter_benchmark.cc index 4cfbbdf8..28939d08 100644 --- a/benchmark/query_mm_d_filter_benchmark.cc +++ b/benchmark/query_mm_d_filter_benchmark.cc @@ -141,7 +141,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr double query_endge_length() { + constexpr double query_edge_length() { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; @@ -279,9 +279,11 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, const Qu template void IndexBenchmark::CreateQuery(Query& query) { - double radius = query_endge_length() * 0.5; + double radius = query_edge_length() * 0.5; + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)radius * 2) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto x = cube_distribution_(random_engine_); + auto x = shift * cube_distribution_(random_engine_); query.box.min()[d] = x - radius; query.box.max()[d] = x + radius; query.center[d] = x; diff --git a/include/phtree/common/flat_array_map.h b/include/phtree/common/flat_array_map.h index 8db59a89..9b59a7cd 100644 --- a/include/phtree/common/flat_array_map.h +++ b/include/phtree/common/flat_array_map.h @@ -101,29 +101,19 @@ class flat_array_map { public: [[nodiscard]] auto find(size_t index) noexcept { - return occupied(index) ? iterator{index, this} : end(); + return iterator{occupied(index) ? index : SIZE, this}; } [[nodiscard]] auto lower_bound(size_t index) const { - size_t index2 = lower_bound_index(index); - if (index2 < SIZE) { - return iterator{index2, this}; - } - return end(); + return iterator{lower_bound_index(index), this}; } [[nodiscard]] auto begin() const { - size_t index = CountTrailingZeros(occupancy); - // Assert index points to a valid position or outside the map if the map is empty - assert((size() == 0 && index >= SIZE) || occupied(index)); - return iterator{index < SIZE ? index : SIZE, this}; + return iterator{lower_bound_index(0), this}; } [[nodiscard]] auto cbegin() const { - size_t index = CountTrailingZeros(occupancy); - // Assert index points to a valid position or outside the map if the map is empty - assert((size() == 0 && index >= SIZE) || occupied(index)); - return iterator{index < SIZE ? index : SIZE, this}; + return iterator{lower_bound_index(0), this}; } [[nodiscard]] auto end() const { @@ -151,7 +141,7 @@ class flat_array_map { std::piecewise_construct, std::forward_as_tuple(index), std::forward_as_tuple(std::forward(args)...)); - occupied(index, true); + occupy(index); return {&data(index), true}; } return {&data(index), false}; @@ -160,7 +150,7 @@ class flat_array_map { bool erase(size_t index) { if (occupied(index)) { data(index).~pair(); - occupied(index, false); + unoccupy(index); return true; } return false; @@ -191,17 +181,22 @@ class flat_array_map { return std::min(SIZE, index + num_zeros); } - void occupied(size_t index, bool flag) { - (void)flag; + void occupy(size_t index) { assert(index < SIZE); - assert(occupied(index) != flag); + assert(!occupied(index)); + // flip the bit + occupancy ^= (1ul << index); + } + + void unoccupy(size_t index) { + assert(index < SIZE); + assert(occupied(index)); // flip the bit occupancy ^= (1ul << index); - assert(occupied(index) == flag); } [[nodiscard]] bool occupied(size_t index) const { - return (occupancy >> index) & 1ul; + return (occupancy >> index) & 1; } std::uint64_t occupancy = 0; From e1a66e86be502cef46995d427e4404fec41b76b5 Mon Sep 17 00:00:00 2001 From: tzaeschke Date: Mon, 19 Dec 2022 18:35:50 +0100 Subject: [PATCH 2/3] initial --- benchmark/count_mm_d_benchmark.cc | 12 ++- benchmark/find_benchmark.cc | 111 ++++++++--------------- benchmark/query_benchmark.cc | 11 +-- benchmark/query_box_d_benchmark.cc | 10 +- benchmark/query_d_benchmark.cc | 11 +-- benchmark/query_mm_box_d_benchmark.cc | 12 ++- benchmark/query_mm_d_benchmark.cc | 64 +++++-------- benchmark/query_mm_d_filter_benchmark.cc | 8 +- include/phtree/common/flat_array_map.h | 37 ++++---- 9 files changed, 112 insertions(+), 164 deletions(-) diff --git a/benchmark/count_mm_d_benchmark.cc b/benchmark/count_mm_d_benchmark.cc index b05987bd..a2840cb7 100644 --- a/benchmark/count_mm_d_benchmark.cc +++ b/benchmark/count_mm_d_benchmark.cc @@ -67,7 +67,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr double query_endge_length() { + constexpr double query_edge_length() { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; @@ -165,11 +165,13 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, const Qu template void IndexBenchmark::CreateQuery(Query& query) { - double radius = query_endge_length() * 0.5; + double length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto s = cube_distribution_(random_engine_); - query.box.min()[d] = s - radius; - query.box.max()[d] = s + radius; + auto s = shift * cube_distribution_(random_engine_); + query.box.min()[d] = s; + query.box.max()[d] = s + length; } } diff --git a/benchmark/find_benchmark.cc b/benchmark/find_benchmark.cc index c7abe529..132a3236 100644 --- a/benchmark/find_benchmark.cc +++ b/benchmark/find_benchmark.cc @@ -35,18 +35,17 @@ enum QueryType { /* * Benchmark for looking up entries by their key. */ -template +template class IndexBenchmark { public: - IndexBenchmark( - benchmark::State& state, TestGenerator data_type, int num_entities, QueryType query_type); + IndexBenchmark(benchmark::State& state, double dummy); void Benchmark(benchmark::State& state); private: void SetupWorld(benchmark::State& state); - int QueryWorldCount(benchmark::State& state); - int QueryWorldFind(benchmark::State& state); + int QueryWorldCount(); + int QueryWorldFind(); const TestGenerator data_type_; const size_t num_entities_; @@ -58,34 +57,33 @@ class IndexBenchmark { std::vector> points_; }; -template -IndexBenchmark::IndexBenchmark( - benchmark::State& state, TestGenerator data_type, int num_entities, QueryType query_type) -: data_type_{data_type} -, num_entities_(num_entities) -, query_type_(query_type) +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, double) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, query_type_(QUERY_TYPE) , random_engine_{1} , cube_distribution_{0, GLOBAL_MAX} -, points_(num_entities) { +, points_(state.range(0)) { logging::SetupDefaultLogging(); SetupWorld(state); } -template -void IndexBenchmark::Benchmark(benchmark::State& state) { +template +void IndexBenchmark::Benchmark(benchmark::State& state) { int num_inner = 0; int num_found = 0; switch (query_type_) { case COUNT: { for (auto _ : state) { - num_found += QueryWorldCount(state); + num_found += QueryWorldCount(); ++num_inner; } break; } case FIND: { for (auto _ : state) { - num_found += QueryWorldFind(state); + num_found += QueryWorldFind(); ++num_inner; } break; @@ -98,8 +96,8 @@ void IndexBenchmark::Benchmark(benchmark::State& state) { state.counters["avg_result_count"] += num_found; } -template -void IndexBenchmark::SetupWorld(benchmark::State& state) { +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); for (size_t i = 0; i < num_entities_; ++i) { @@ -110,27 +108,27 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); - logging::info("World setup complete."); } -template -int IndexBenchmark::QueryWorldCount(benchmark::State&) { +template +int IndexBenchmark::QueryWorldCount() { static int pos = 0; pos = (pos + 1) % num_entities_; - bool found = true; + bool found; if (pos % 2 == 0) { - assert(tree_.find(points_.at(pos)) != tree_.end()); + found = tree_.count(points_.at(pos)); + assert(found); } else { int x = pos % GLOBAL_MAX; PhPoint p = PhPoint({x, x, x}); - found = tree_.find(p) != tree_.end(); + found = tree_.count(p); } return found; } -template -int IndexBenchmark::QueryWorldFind(benchmark::State&) { +template +int IndexBenchmark::QueryWorldFind() { static int pos = 0; pos = (pos + 1) % num_entities_; bool found; @@ -150,61 +148,26 @@ int IndexBenchmark::QueryWorldFind(benchmark::State&) { } // namespace template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3> benchmark{state, arguments...}; +void PhTree3DCount(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, QueryType::COUNT> benchmark{state, arguments...}; benchmark.Benchmark(state); } -// index type, scenario name, data_generator, num_entities, function_to_call -// PhTree 3D CUBE -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_1K, TestGenerator::CUBE, 1000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_10K, TestGenerator::CUBE, 10000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_100K, TestGenerator::CUBE, 100000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CU_1M, TestGenerator::CUBE, 1000000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_1K, TestGenerator::CUBE, 1000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_10K, TestGenerator::CUBE, 10000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_100K, TestGenerator::CUBE, 100000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CU_1M, TestGenerator::CUBE, 1000000, FIND) - ->Unit(benchmark::kMillisecond); +template +void PhTree3DFind(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, QueryType::FIND> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} // index type, scenario name, data_generator, num_entities, function_to_call -// PhTree 3D CLUSTER -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_1K, TestGenerator::CLUSTER, 1000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_10K, TestGenerator::CLUSTER, 10000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_100K, TestGenerator::CLUSTER, 100000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, COUNT_CL_1M, TestGenerator::CLUSTER, 1000000, COUNT) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_1K, TestGenerator::CLUSTER, 1000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_10K, TestGenerator::CLUSTER, 10000, FIND) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_100K, TestGenerator::CLUSTER, 100000, FIND) +BENCHMARK_CAPTURE(PhTree3DCount, COUNT, 0.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -BENCHMARK_CAPTURE(PhTree3D, FIND_CL_1M, TestGenerator::CLUSTER, 1000000, FIND) +BENCHMARK_CAPTURE(PhTree3DFind, FIND, 0.0) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/benchmark/query_benchmark.cc b/benchmark/query_benchmark.cc index dd24386b..5fa89813 100644 --- a/benchmark/query_benchmark.cc +++ b/benchmark/query_benchmark.cc @@ -46,7 +46,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr int query_endge_length() { + constexpr int query_edge_length() { return ( int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; @@ -110,12 +110,11 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, PhBox& query_ template void IndexBenchmark::CreateQuery(PhBox& query_box) { - int length = query_endge_length(); - // scale to ensure query lies within boundary - double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + int length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - scalar_64_t s = cube_distribution_(random_engine_); - s = (scalar_64_t)(s * scale); + scalar_64_t s = (scalar_64_t)(shift * cube_distribution_(random_engine_)); query_box.min()[d] = s; query_box.max()[d] = s + length; } diff --git a/benchmark/query_box_d_benchmark.cc b/benchmark/query_box_d_benchmark.cc index 5f8a6682..e88bf00f 100644 --- a/benchmark/query_box_d_benchmark.cc +++ b/benchmark/query_box_d_benchmark.cc @@ -58,7 +58,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr int query_endge_length() { + constexpr int query_edge_length() { return ( int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; @@ -153,12 +153,12 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, BoxTyp template void IndexBenchmark::CreateQuery(BoxType& query_box) { - int length = query_endge_length(); + int length = query_edge_length(); // scale to ensure query lies within boundary - double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto s = cube_distribution_(random_engine_); - s = s * scale; + auto s = shift * cube_distribution_(random_engine_); + s = s * shift; query_box.min()[d] = s; query_box.max()[d] = s + length; } diff --git a/benchmark/query_d_benchmark.cc b/benchmark/query_d_benchmark.cc index b487b1ca..2c6ad3ec 100644 --- a/benchmark/query_d_benchmark.cc +++ b/benchmark/query_d_benchmark.cc @@ -57,7 +57,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr int query_endge_length() { + constexpr int query_edge_length() { return ( int)(GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM)); }; @@ -152,12 +152,11 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, BoxTyp template void IndexBenchmark::CreateQuery(BoxType& query_box) { - int length = query_endge_length(); - // scale to ensure query lies within boundary - double scale = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; + int length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto s = cube_distribution_(random_engine_); - s = s * scale; + auto s = shift * cube_distribution_(random_engine_); query_box.min()[d] = s; query_box.max()[d] = s + length; } diff --git a/benchmark/query_mm_box_d_benchmark.cc b/benchmark/query_mm_box_d_benchmark.cc index 9f42cccb..51dd6c72 100644 --- a/benchmark/query_mm_box_d_benchmark.cc +++ b/benchmark/query_mm_box_d_benchmark.cc @@ -68,7 +68,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr double query_endge_length() { + constexpr double query_edge_length() { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; @@ -184,11 +184,13 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, const Qu template void IndexBenchmark::CreateQuery(Query& query) { - double radius = query_endge_length() * 0.5; + double length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto x = cube_distribution_(random_engine_); - query.box_.min()[d] = x - radius; - query.box_.max()[d] = x + radius; + auto x = shift * cube_distribution_(random_engine_); + query.box_.min()[d] = x; + query.box_.max()[d] = x + length; } } diff --git a/benchmark/query_mm_d_benchmark.cc b/benchmark/query_mm_d_benchmark.cc index 24a2de5d..ab0b4054 100644 --- a/benchmark/query_mm_d_benchmark.cc +++ b/benchmark/query_mm_d_benchmark.cc @@ -41,8 +41,6 @@ using BucketType = std::set; struct Query { QueryBox box{}; - TestPoint center{}; - double radius{}; }; template @@ -73,7 +71,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr double query_endge_length() { + constexpr double query_edge_length() { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; @@ -131,53 +129,41 @@ void InsertEntry( tree.emplace(point, data); } -int CheckPosition(const payload_t& entity, const TestPoint& center, double radius) { - const auto& point = entity; - bool dx = abs(center[0] - point[0]) <= radius; - bool dy = abs(center[1] - point[1]) <= radius; - bool dz = abs(center[2] - point[2]) <= radius; - return dx && dy && dz ? 1 : -100000000; -} - struct CounterTreeWithMap { void operator()(const PhPointD<3>&, const BucketType& value) { for (auto& x : value) { - // n_ += (x.entity_id_ >= 0); - n_ += CheckPosition(x, center_, radius_); + (void)x; + ++n_; } } - const TestPoint& center_; - double radius_; size_t n_; }; struct CounterMultiMap { - void operator()(const PhPointD<3>&, const payload_t& value) { - n_ += CheckPosition(value, center_, radius_); + void operator()(const PhPointD<3>&, const payload_t&) { + ++n_; } - const TestPoint& center_; - double radius_; size_t n_; }; template typename std::enable_if::type CountEntries( TestMap& tree, const Query& query) { - CounterTreeWithMap counter{query.center, query.radius, 0}; + CounterTreeWithMap counter{0}; tree.for_each(query.box, counter); return counter.n_; } template size_t CountEntries(TestMap& tree, const Query& query) { - CounterMultiMap counter{query.center, query.radius, 0}; + CounterMultiMap counter{0}; tree.for_each(query.box, counter); return counter.n_; } template size_t CountEntries(TestMap& tree, const Query& query) { - CounterMultiMap counter{query.center, query.radius, 0}; + CounterMultiMap counter{0}; tree.for_each(query.box, counter); return counter.n_; } @@ -208,24 +194,18 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, const Qu template void IndexBenchmark::CreateQuery(Query& query) { - double radius = query_endge_length() * 0.5; + double length = query_edge_length(); + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)length) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto x = cube_distribution_(random_engine_); - query.box.min()[d] = x - radius; - query.box.max()[d] = x + radius; - query.center[d] = x; + auto x = shift * cube_distribution_(random_engine_); + query.box.min()[d] = x; + query.box.max()[d] = x + length; } - query.radius = radius; } } // namespace -template -void PhTree3D(benchmark::State& state, Arguments&&... arguments) { - IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; - benchmark.Benchmark(state); -} - template void PhTreeMultiMap3D(benchmark::State& state, Arguments&&... arguments) { IndexBenchmark<3, Scenario::MULTI_MAP> benchmark{state, arguments...}; @@ -238,21 +218,27 @@ void PhTreeMultiMapStd3D(benchmark::State& state, Arguments&&... arguments) { benchmark.Benchmark(state); } +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + // index type, scenario name, data_type, num_entities, avg_query_result_size -// PhTree -BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) +// PhTreeMultiMap +BENCHMARK_CAPTURE(PhTreeMultiMap3D, WQ_100, 100.0) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); // PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMap3D, WQ_100, 100.0) +BENCHMARK_CAPTURE(PhTreeMultiMapStd3D, WQ_100, 100.0) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); -// PhTreeMultiMap -BENCHMARK_CAPTURE(PhTreeMultiMapStd3D, WQ_100, 100.0) +// PhTree with manual BUCKET management +BENCHMARK_CAPTURE(PhTree3D, WQ_100, 100.0) ->RangeMultiplier(10) ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) ->Unit(benchmark::kMillisecond); diff --git a/benchmark/query_mm_d_filter_benchmark.cc b/benchmark/query_mm_d_filter_benchmark.cc index 4cfbbdf8..28939d08 100644 --- a/benchmark/query_mm_d_filter_benchmark.cc +++ b/benchmark/query_mm_d_filter_benchmark.cc @@ -141,7 +141,7 @@ class IndexBenchmark { const size_t num_entities_; const double avg_query_result_size_; - constexpr double query_endge_length() { + constexpr double query_edge_length() { return GLOBAL_MAX * pow(avg_query_result_size_ / (double)num_entities_, 1. / (double)DIM); }; @@ -279,9 +279,11 @@ void IndexBenchmark::QueryWorld(benchmark::State& state, const Qu template void IndexBenchmark::CreateQuery(Query& query) { - double radius = query_endge_length() * 0.5; + double radius = query_edge_length() * 0.5; + // shift to ensure query lies within boundary + double shift = (GLOBAL_MAX - (double)radius * 2) / GLOBAL_MAX; for (dimension_t d = 0; d < DIM; ++d) { - auto x = cube_distribution_(random_engine_); + auto x = shift * cube_distribution_(random_engine_); query.box.min()[d] = x - radius; query.box.max()[d] = x + radius; query.center[d] = x; diff --git a/include/phtree/common/flat_array_map.h b/include/phtree/common/flat_array_map.h index 8db59a89..9b59a7cd 100644 --- a/include/phtree/common/flat_array_map.h +++ b/include/phtree/common/flat_array_map.h @@ -101,29 +101,19 @@ class flat_array_map { public: [[nodiscard]] auto find(size_t index) noexcept { - return occupied(index) ? iterator{index, this} : end(); + return iterator{occupied(index) ? index : SIZE, this}; } [[nodiscard]] auto lower_bound(size_t index) const { - size_t index2 = lower_bound_index(index); - if (index2 < SIZE) { - return iterator{index2, this}; - } - return end(); + return iterator{lower_bound_index(index), this}; } [[nodiscard]] auto begin() const { - size_t index = CountTrailingZeros(occupancy); - // Assert index points to a valid position or outside the map if the map is empty - assert((size() == 0 && index >= SIZE) || occupied(index)); - return iterator{index < SIZE ? index : SIZE, this}; + return iterator{lower_bound_index(0), this}; } [[nodiscard]] auto cbegin() const { - size_t index = CountTrailingZeros(occupancy); - // Assert index points to a valid position or outside the map if the map is empty - assert((size() == 0 && index >= SIZE) || occupied(index)); - return iterator{index < SIZE ? index : SIZE, this}; + return iterator{lower_bound_index(0), this}; } [[nodiscard]] auto end() const { @@ -151,7 +141,7 @@ class flat_array_map { std::piecewise_construct, std::forward_as_tuple(index), std::forward_as_tuple(std::forward(args)...)); - occupied(index, true); + occupy(index); return {&data(index), true}; } return {&data(index), false}; @@ -160,7 +150,7 @@ class flat_array_map { bool erase(size_t index) { if (occupied(index)) { data(index).~pair(); - occupied(index, false); + unoccupy(index); return true; } return false; @@ -191,17 +181,22 @@ class flat_array_map { return std::min(SIZE, index + num_zeros); } - void occupied(size_t index, bool flag) { - (void)flag; + void occupy(size_t index) { assert(index < SIZE); - assert(occupied(index) != flag); + assert(!occupied(index)); + // flip the bit + occupancy ^= (1ul << index); + } + + void unoccupy(size_t index) { + assert(index < SIZE); + assert(occupied(index)); // flip the bit occupancy ^= (1ul << index); - assert(occupied(index) == flag); } [[nodiscard]] bool occupied(size_t index) const { - return (occupancy >> index) & 1ul; + return (occupancy >> index) & 1; } std::uint64_t occupancy = 0; From 283944b14713c86b8e738de3fce38f91c9d94f23 Mon Sep 17 00:00:00 2001 From: tzaeschke Date: Mon, 19 Dec 2022 19:07:28 +0100 Subject: [PATCH 3/3] initial --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eca7e57a..52b03b39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Added B+tree multimap for internal (future) use. [#93](https://github.com/tzaeschke/phtree-cpp/issues/93) ### Changed +- Clean up array_map. [#107](https://github.com/tzaeschke/phtree-cpp/issues/107), - Fixed compatibility with bazel 6.0.0. [#109](https://github.com/tzaeschke/phtree-cpp/issues/109), - Added missing compiler flag for TZCNT/CTZ (count trailing zeros). This should be much faster on haswell or later CPUs. [#103](https://github.com/tzaeschke/phtree-cpp/issues/103),