Skip to content

Commit

Permalink
Use unordered layout for aggregate-only read queries on sparse arrays. (
Browse files Browse the repository at this point in the history
#5255)

[SC-53286](https://app.shortcut.com/tiledb-inc/story/53286/ignore-query-order-in-aggregate-only-queries)

If the user has not set a query layout, it will default to row-major,
which will use the legacy reader on sparse arrays, and fail if
aggregates were specified.

However, if only aggregates are specified and no regular data buffers,
the layout doesn't matter and we can transparently switch to the much
more efficient unordered layout.

---
TYPE: IMPROVEMENT
DESC: Fix read queries on sparse arrays where only aggregates are
requested and no layout is specified.
  • Loading branch information
teo-tsirpanis authored Sep 2, 2024
1 parent b239ff5 commit fd24448
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 12 deletions.
41 changes: 40 additions & 1 deletion test/src/test-cppapi-aggregates.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ void CppAggregatesFx<T>::generate_test_params() {
nullable_ = GENERATE(true, false);
allow_dups_ = GENERATE(true, false);
set_qc_values_ = {false};
layout_values_ = {TILEDB_UNORDERED};
layout_values_ = {TILEDB_ROW_MAJOR, TILEDB_UNORDERED};
use_dim_values_ = {true, false};
if (nullable_ || !std::is_same<T, uint64_t>::value) {
use_dim_values_ = {false};
Expand All @@ -195,6 +195,12 @@ void CppAggregatesFx<T>::run_all_combinations(std::function<void()> fn) {
for (bool set_qc : set_qc_values_) {
set_qc_ = set_qc;
for (tiledb_layout_t layout : layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (request_data && layout != TILEDB_UNORDERED) {
continue;
}
layout_ = layout;
fn();
}
Expand Down Expand Up @@ -1444,6 +1450,12 @@ TEST_CASE_METHOD(
for (bool set_qc : set_qc_values_) {
set_qc_ = set_qc;
for (tiledb_layout_t layout : layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (request_data && layout != TILEDB_UNORDERED) {
continue;
}
layout_ = layout;
Query query(ctx_, array, TILEDB_READ);

Expand Down Expand Up @@ -1824,6 +1836,11 @@ TEMPLATE_LIST_TEST_CASE(
for (bool set_qc : fx.set_qc_values_) {
fx.set_qc_ = set_qc;
for (tiledb_layout_t layout : fx.layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (!fx.dense_ && request_data && layout != TILEDB_UNORDERED)
continue;
fx.layout_ = layout;
Query query(fx.ctx_, array, TILEDB_READ);

Expand Down Expand Up @@ -2048,6 +2065,12 @@ TEST_CASE_METHOD(
for (bool set_qc : set_qc_values_) {
set_qc_ = set_qc;
for (tiledb_layout_t layout : layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (request_data && layout != TILEDB_UNORDERED) {
continue;
}
layout_ = layout;
Query query(ctx_, array, TILEDB_READ);

Expand Down Expand Up @@ -2146,6 +2169,12 @@ TEST_CASE_METHOD(
for (bool set_qc : set_qc_values_) {
set_qc_ = set_qc;
for (tiledb_layout_t layout : layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (layout != TILEDB_UNORDERED) {
continue;
}
layout_ = layout;
Query query(ctx_, array, TILEDB_READ);

Expand Down Expand Up @@ -2285,6 +2314,11 @@ TEST_CASE_METHOD(
for (bool set_qc : set_qc_values_) {
set_qc_ = set_qc;
for (tiledb_layout_t layout : layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (layout != TILEDB_UNORDERED)
continue;
layout_ = layout;
Query query(ctx_, array, TILEDB_READ);

Expand Down Expand Up @@ -2365,6 +2399,11 @@ TEMPLATE_LIST_TEST_CASE_METHOD(
for (bool set_qc : CppAggregatesFx<T>::set_qc_values_) {
CppAggregatesFx<T>::set_qc_ = set_qc;
for (tiledb_layout_t layout : CppAggregatesFx<T>::layout_values_) {
// Filter invalid combination. The legacy reader does not support
// aggregates, and we cannot automatically switch to unordered
// reads if we are requesting both the aggregates and the data.
if (!CppAggregatesFx<T>::dense_ && layout != TILEDB_UNORDERED)
continue;
CppAggregatesFx<T>::layout_ = layout;
Query query(CppAggregatesFx<T>::ctx_, array, TILEDB_READ);

Expand Down
34 changes: 24 additions & 10 deletions tiledb/sm/query/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1762,7 +1762,21 @@ bool Query::is_aggregate(std::string output_field_name) const {
/* PRIVATE METHODS */
/* ****************************** */

Layout Query::effective_layout() const {
// If the user has not set a layout, it will default to row-major, which will
// use the legacy reader on sparse arrays, and fail if aggregates were
// specified. However, if only aggregates are specified and no regular data
// buffers, the layout doesn't matter and we can transparently switch to the
// much more efficient unordered layout.
if (type_ == QueryType::READ && !array_schema_->dense() && has_aggregates() &&
buffers_.empty()) {
return Layout::UNORDERED;
}
return layout_;
}

Status Query::create_strategy(bool skip_checks_serialization) {
auto layout = effective_layout();
auto params = StrategyParams(
resources_,
array_->memory_tracker(),
Expand All @@ -1775,16 +1789,16 @@ Status Query::create_strategy(bool skip_checks_serialization) {
buffers_,
aggregate_buffers_,
subarray_,
layout_,
layout,
condition_,
default_channel_aggregates_,
skip_checks_serialization);
if (type_ == QueryType::WRITE || type_ == QueryType::MODIFY_EXCLUSIVE) {
if (layout_ == Layout::COL_MAJOR || layout_ == Layout::ROW_MAJOR) {
if (layout == Layout::COL_MAJOR || layout == Layout::ROW_MAJOR) {
if (!array_schema_->dense()) {
return Status_QueryError(
"Cannot create strategy; sparse writes do not support layout " +
layout_str(layout_));
layout_str(layout));
}
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
OrderedWriter,
Expand All @@ -1795,11 +1809,11 @@ Status Query::create_strategy(bool skip_checks_serialization) {
coords_info_,
remote_query_,
fragment_name_));
} else if (layout_ == Layout::UNORDERED) {
} else if (layout == Layout::UNORDERED) {
if (array_schema_->dense()) {
return Status_QueryError(
"Cannot create strategy; dense writes do not support layout " +
layout_str(layout_));
layout_str(layout));
}
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
UnorderedWriter,
Expand All @@ -1811,7 +1825,7 @@ Status Query::create_strategy(bool skip_checks_serialization) {
written_buffers_,
remote_query_,
fragment_name_));
} else if (layout_ == Layout::GLOBAL_ORDER) {
} else if (layout == Layout::GLOBAL_ORDER) {
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
GlobalOrderWriter,
stats_->create_child("Writer"),
Expand All @@ -1826,7 +1840,7 @@ Status Query::create_strategy(bool skip_checks_serialization) {
fragment_name_));
} else {
return Status_QueryError(
"Cannot create strategy; unsupported layout " + layout_str(layout_));
"Cannot create strategy; unsupported layout " + layout_str(layout));
}
} else if (type_ == QueryType::READ) {
bool all_dense = true;
Expand Down Expand Up @@ -1854,7 +1868,7 @@ Status Query::create_strategy(bool skip_checks_serialization) {
params,
dimension_label_increasing_));
} else if (use_refactored_sparse_unordered_with_dups_reader(
layout_, *array_schema_)) {
layout, *array_schema_)) {
if (Query::non_overlapping_ranges() || !subarray_.is_set() ||
subarray_.range_num() == 1) {
strategy_ = tdb_unique_ptr<IQueryStrategy>(tdb_new(
Expand All @@ -1870,9 +1884,9 @@ Status Query::create_strategy(bool skip_checks_serialization) {
params));
}
} else if (
use_refactored_sparse_global_order_reader(layout_, *array_schema_) &&
use_refactored_sparse_global_order_reader(layout, *array_schema_) &&
!array_schema_->dense() &&
(layout_ == Layout::GLOBAL_ORDER || layout_ == Layout::UNORDERED)) {
(layout == Layout::GLOBAL_ORDER || layout == Layout::UNORDERED)) {
// Using the reader for unordered queries to do deduplication.
if (Query::non_overlapping_ranges() || !subarray_.is_set() ||
subarray_.range_num() == 1) {
Expand Down
11 changes: 10 additions & 1 deletion tiledb/sm/query/query.h
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,7 @@ class Query {
/**
* Returns true if the query has any aggregates on any channels
*/
bool has_aggregates() {
bool has_aggregates() const {
// We only need to check the default channel for now
return !default_channel_aggregates_.empty();
}
Expand Down Expand Up @@ -1149,6 +1149,15 @@ class Query {
/* PRIVATE METHODS */
/* ********************************* */

/**
* Return the layout that will be used to execute the query.
*
* This is usually set by the user but can be overridden by TileDB in cases
* where the data order would not have an observable difference, like queries
* with only aggregates.
*/
Layout effective_layout() const;

/**
* Create the strategy.
*
Expand Down

0 comments on commit fd24448

Please sign in to comment.