Skip to content

Commit

Permalink
Improve large dense aggregate reads with tile metadata only. (#4657)
Browse files Browse the repository at this point in the history
The dense reader was creating a bitmap to compute all aggregate results
at the top level of the read. For large aggregate reads where we don't
need to load any tiles, this could be quite large. It also would turn
out to be completely unnecessary. This fix moves the bitmap to the lower
level of the read, where a smaller bitmap can be created only if
necessary.

---
TYPE: IMPROVEMENT
DESC: Improve large dense aggregate reads with tile metadata only.
  • Loading branch information
KiterLuc authored Jan 23, 2024
1 parent 8b6501e commit 598e84e
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 13 deletions.
25 changes: 13 additions & 12 deletions tiledb/sm/query/readers/dense_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1319,13 +1319,6 @@ Status DenseReader::process_aggregates(
const auto& tile_coords = subarray.tile_coords();
const auto global_order = layout_ == Layout::GLOBAL_ORDER;

std::vector<uint8_t> aggregate_bitmap;
if (condition_.has_value()) {
aggregate_bitmap = qc_result;
} else {
aggregate_bitmap.resize(subarray.cell_num(), 1);
}

// Process values in parallel.
auto status = parallel_for_2d(
storage_manager_->compute_tp(),
Expand Down Expand Up @@ -1358,7 +1351,7 @@ Status DenseReader::process_aggregates(
tile_subarrays[t],
global_order ? tile_offsets[t] : 0,
range_info,
aggregate_bitmap,
qc_result,
range_thread_idx,
num_range_threads));
}
Expand Down Expand Up @@ -1863,7 +1856,7 @@ Status DenseReader::aggregate_tiles(
const Subarray& tile_subarray,
const uint64_t global_cell_offset,
const std::vector<RangeInfo<DimType>>& range_info,
std::vector<uint8_t>& aggregate_bitmap,
const std::vector<uint8_t>& qc_result,
const uint64_t range_thread_idx,
const uint64_t num_range_threads) {
// For easy reference
Expand Down Expand Up @@ -1909,6 +1902,14 @@ Status DenseReader::aggregate_tiles(
cell_offset = iter.dest_offset_row_col();
}

std::vector<uint8_t> aggregate_bitmap(iter.cell_slab_length(), 1);
if (condition_.has_value()) {
memcpy(
aggregate_bitmap.data(),
qc_result.data() + cell_offset,
iter.cell_slab_length());
}

// Iterate through all fragment domains and copy data.
for (uint64_t fd = 0; fd < frag_domains.size(); fd++) {
// If the cell slab overlaps this fragment domain range, copy data.
Expand Down Expand Up @@ -1936,7 +1937,7 @@ Status DenseReader::aggregate_tiles(
iter.pos_in_tile() + start,
iter.pos_in_tile() + end + 1,
tile_tuples[fd],
&aggregate_bitmap[cell_offset + start])};
aggregate_bitmap.data() + start)};
for (auto& aggregate : aggregates) {
aggregate->aggregate_data(aggregate_buffer);
}
Expand All @@ -1952,7 +1953,7 @@ Status DenseReader::aggregate_tiles(
start_cell,
start_cell + 1,
tile_tuples[fd],
&aggregate_bitmap[cell_offset + start + i])};
aggregate_bitmap.data() + start + i)};
for (auto& aggregate : aggregates) {
aggregate->aggregate_data(aggregate_buffer);
}
Expand All @@ -1964,7 +1965,7 @@ Status DenseReader::aggregate_tiles(
// fragments.
if (fd != frag_domains.size() - 1) {
for (uint64_t c = start; c <= end; c++) {
aggregate_bitmap[cell_offset + c] = 0;
aggregate_bitmap[c] = 0;
}
}

Expand Down
2 changes: 1 addition & 1 deletion tiledb/sm/query/readers/dense_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ class DenseReader : public ReaderBase, public IQueryStrategy {
const Subarray& tile_subarray,
const uint64_t global_cell_offset,
const std::vector<RangeInfo<DimType>>& range_info,
std::vector<uint8_t>& aggregate_bitmap,
const std::vector<uint8_t>& qc_result,
const uint64_t range_thread_idx,
const uint64_t num_range_threads);

Expand Down

0 comments on commit 598e84e

Please sign in to comment.