From 598e84eb3f6f77a7058ef68a26447984b545e3e4 Mon Sep 17 00:00:00 2001 From: KiterLuc <67824247+KiterLuc@users.noreply.github.com> Date: Tue, 23 Jan 2024 17:56:56 +0100 Subject: [PATCH] Improve large dense aggregate reads with tile metadata only. (#4657) The dense reader was creating a bitmap to compute all aggregate results at the top level of the read. For large aggregate reads where we don't need to load any tiles, this could be quite large. It also would turn out to be completely unnecessary. This fix moves the bitmap to the lower level of the read, where a smaller bitmap can be created only if necessary. --- TYPE: IMPROVEMENT DESC: Improve large dense aggregate reads with tile metadata only. --- tiledb/sm/query/readers/dense_reader.cc | 25 +++++++++++++------------ tiledb/sm/query/readers/dense_reader.h | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/tiledb/sm/query/readers/dense_reader.cc b/tiledb/sm/query/readers/dense_reader.cc index df18d001bd9..501cdd5b4ad 100644 --- a/tiledb/sm/query/readers/dense_reader.cc +++ b/tiledb/sm/query/readers/dense_reader.cc @@ -1319,13 +1319,6 @@ Status DenseReader::process_aggregates( const auto& tile_coords = subarray.tile_coords(); const auto global_order = layout_ == Layout::GLOBAL_ORDER; - std::vector aggregate_bitmap; - if (condition_.has_value()) { - aggregate_bitmap = qc_result; - } else { - aggregate_bitmap.resize(subarray.cell_num(), 1); - } - // Process values in parallel. auto status = parallel_for_2d( storage_manager_->compute_tp(), @@ -1358,7 +1351,7 @@ Status DenseReader::process_aggregates( tile_subarrays[t], global_order ? tile_offsets[t] : 0, range_info, - aggregate_bitmap, + qc_result, range_thread_idx, num_range_threads)); } @@ -1863,7 +1856,7 @@ Status DenseReader::aggregate_tiles( const Subarray& tile_subarray, const uint64_t global_cell_offset, const std::vector>& range_info, - std::vector& aggregate_bitmap, + const std::vector& qc_result, const uint64_t range_thread_idx, const uint64_t num_range_threads) { // For easy reference @@ -1909,6 +1902,14 @@ Status DenseReader::aggregate_tiles( cell_offset = iter.dest_offset_row_col(); } + std::vector aggregate_bitmap(iter.cell_slab_length(), 1); + if (condition_.has_value()) { + memcpy( + aggregate_bitmap.data(), + qc_result.data() + cell_offset, + iter.cell_slab_length()); + } + // Iterate through all fragment domains and copy data. for (uint64_t fd = 0; fd < frag_domains.size(); fd++) { // If the cell slab overlaps this fragment domain range, copy data. @@ -1936,7 +1937,7 @@ Status DenseReader::aggregate_tiles( iter.pos_in_tile() + start, iter.pos_in_tile() + end + 1, tile_tuples[fd], - &aggregate_bitmap[cell_offset + start])}; + aggregate_bitmap.data() + start)}; for (auto& aggregate : aggregates) { aggregate->aggregate_data(aggregate_buffer); } @@ -1952,7 +1953,7 @@ Status DenseReader::aggregate_tiles( start_cell, start_cell + 1, tile_tuples[fd], - &aggregate_bitmap[cell_offset + start + i])}; + aggregate_bitmap.data() + start + i)}; for (auto& aggregate : aggregates) { aggregate->aggregate_data(aggregate_buffer); } @@ -1964,7 +1965,7 @@ Status DenseReader::aggregate_tiles( // fragments. if (fd != frag_domains.size() - 1) { for (uint64_t c = start; c <= end; c++) { - aggregate_bitmap[cell_offset + c] = 0; + aggregate_bitmap[c] = 0; } } diff --git a/tiledb/sm/query/readers/dense_reader.h b/tiledb/sm/query/readers/dense_reader.h index 8560171edec..16c997299ac 100644 --- a/tiledb/sm/query/readers/dense_reader.h +++ b/tiledb/sm/query/readers/dense_reader.h @@ -400,7 +400,7 @@ class DenseReader : public ReaderBase, public IQueryStrategy { const Subarray& tile_subarray, const uint64_t global_cell_offset, const std::vector>& range_info, - std::vector& aggregate_bitmap, + const std::vector& qc_result, const uint64_t range_thread_idx, const uint64_t num_range_threads);