Skip to content

Commit

Permalink
Improve PartitionPruner and trivial count opt
Browse files Browse the repository at this point in the history
  • Loading branch information
amosbird committed Mar 16, 2024
1 parent 6f49ffc commit b0fe7c2
Show file tree
Hide file tree
Showing 20 changed files with 352 additions and 230 deletions.
2 changes: 1 addition & 1 deletion src/Interpreters/InterpreterSelectQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
MergeTreeWhereOptimizer where_optimizer{
std::move(column_compressed_sizes),
metadata_snapshot,
storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context),
storage->getConditionEstimatorByPredicate(storage_snapshot, nullptr, context),
queried_columns,
supported_prewhere_columns,
log};
Expand Down
2 changes: 1 addition & 1 deletion src/Interpreters/Set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,7 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
/// Given left_lower >= left_point, right_lower >= right_point, find if there may be a match in between left_lower and right_lower.
if (left_lower + 1 < right_lower)
{
/// There is an point in between: left_lower + 1
/// There is a point in between: left_lower + 1
return {true, true};
}
else if (left_lower + 1 == right_lower)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
MergeTreeWhereOptimizer where_optimizer{
std::move(column_compressed_sizes),
storage_metadata,
storage.getConditionEstimatorByPredicate(source_step_with_filter->getQueryInfo(), storage_snapshot, context),
storage.getConditionEstimatorByPredicate(storage_snapshot, source_step_with_filter->getFilterActionsDAG(), context),
queried_columns,
storage.supportedPrewhereColumns(),
getLogger("QueryPlanOptimizePrewhere")};
Expand Down
39 changes: 7 additions & 32 deletions src/Processors/QueryPlan/ReadFromMergeTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1275,8 +1275,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
}

ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
MergeTreeData::DataPartsVector parts,
std::vector<AlterConversionsPtr> alter_conversions) const
MergeTreeData::DataPartsVector parts, std::vector<AlterConversionsPtr> alter_conversions, bool find_exact_ranges) const
{
return selectRangesToRead(
std::move(parts),
Expand All @@ -1289,7 +1288,8 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
data,
all_column_names,
log,
indexes);
indexes,
find_exact_ranges);
}

static void buildIndexes(
Expand Down Expand Up @@ -1475,34 +1475,8 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
const MergeTreeData & data,
const Names & all_column_names,
LoggerPtr log,
std::optional<Indexes> & indexes)
{
return selectRangesToReadImpl(
std::move(parts),
std::move(alter_conversions),
metadata_snapshot,
query_info_,
context_,
num_streams,
max_block_numbers_to_read,
data,
all_column_names,
log,
indexes);
}

ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
MergeTreeData::DataPartsVector parts,
std::vector<AlterConversionsPtr> alter_conversions,
const StorageMetadataPtr & metadata_snapshot,
const SelectQueryInfo & query_info_,
ContextPtr context_,
size_t num_streams,
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read,
const MergeTreeData & data,
const Names & all_column_names,
LoggerPtr log,
std::optional<Indexes> & indexes)
std::optional<Indexes> & indexes,
bool find_exact_ranges)
{
AnalysisResult result;
const auto & settings = context_->getSettingsRef();
Expand Down Expand Up @@ -1590,7 +1564,8 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
log,
num_streams,
result.index_stats,
indexes->use_skip_indexes);
indexes->use_skip_indexes,
find_exact_ranges);
}

size_t sum_marks_pk = total_marks_pk;
Expand Down
19 changes: 3 additions & 16 deletions src/Processors/QueryPlan/ReadFromMergeTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,11 @@ class ReadFromMergeTree final : public SourceStepWithFilter
const MergeTreeData & data,
const Names & all_column_names,
LoggerPtr log,
std::optional<Indexes> & indexes);
std::optional<Indexes> & indexes,
bool find_exact_ranges);

AnalysisResultPtr selectRangesToRead(
MergeTreeData::DataPartsVector parts,
std::vector<AlterConversionsPtr> alter_conversions) const;
MergeTreeData::DataPartsVector parts, std::vector<AlterConversionsPtr> alter_conversions, bool find_exact_ranges = false) const;

StorageMetadataPtr getStorageMetadata() const { return metadata_for_reading; }

Expand Down Expand Up @@ -195,19 +195,6 @@ class ReadFromMergeTree final : public SourceStepWithFilter
void applyFilters(ActionDAGNodes added_filter_nodes) override;

private:
static AnalysisResultPtr selectRangesToReadImpl(
MergeTreeData::DataPartsVector parts,
std::vector<AlterConversionsPtr> alter_conversions,
const StorageMetadataPtr & metadata_snapshot,
const SelectQueryInfo & query_info,
ContextPtr context,
size_t num_streams,
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read,
const MergeTreeData & data,
const Names & all_column_names,
LoggerPtr log,
std::optional<Indexes> & indexes);

int getSortDirection() const
{
if (query_info.input_order_info)
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/IStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ StorageID IStorage::getStorageID() const
return storage_id;
}

ConditionEstimator IStorage::getConditionEstimatorByPredicate(const SelectQueryInfo &, const StorageSnapshotPtr &, ContextPtr) const
ConditionEstimator IStorage::getConditionEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const
{
return {};
}
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/IStorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromo
/// Returns true if the storage supports queries with the PREWHERE section.
virtual bool supportsPrewhere() const { return false; }

virtual ConditionEstimator getConditionEstimatorByPredicate(const SelectQueryInfo &, const StorageSnapshotPtr &, ContextPtr) const;
virtual ConditionEstimator getConditionEstimatorByPredicate(const StorageSnapshotPtr &, const ActionsDAGPtr &, ContextPtr) const;

/// Returns which columns supports PREWHERE, or empty std::nullopt if all columns is supported.
/// This is needed for engines whose aggregates data from multiple tables, like Merge.
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/BoolMask.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "BoolMask.h"


/// BoolMask::can_be_X = true implies it will never change during BoolMask::combine.
const BoolMask BoolMask::consider_only_can_be_true(false, true);
const BoolMask BoolMask::consider_only_can_be_false(true, false);
58 changes: 40 additions & 18 deletions src/Storages/MergeTree/BoolMask.h
Original file line number Diff line number Diff line change
@@ -1,37 +1,59 @@
#pragma once

#include <fmt/format.h>

/// Multiple Boolean values. That is, two Boolean values: can it be true, can it be false.
struct BoolMask
{
bool can_be_true = false;
bool can_be_false = false;

BoolMask() = default;
BoolMask(bool can_be_true_, bool can_be_false_) : can_be_true(can_be_true_), can_be_false(can_be_false_) {}
BoolMask(bool can_be_true_, bool can_be_false_) : can_be_true(can_be_true_), can_be_false(can_be_false_) { }

BoolMask operator &(const BoolMask & m) const
{
return {can_be_true && m.can_be_true, can_be_false || m.can_be_false};
}
BoolMask operator |(const BoolMask & m) const
{
return {can_be_true || m.can_be_true, can_be_false && m.can_be_false};
}
BoolMask operator !() const
BoolMask operator&(const BoolMask & m) const { return {can_be_true && m.can_be_true, can_be_false || m.can_be_false}; }
BoolMask operator|(const BoolMask & m) const { return {can_be_true || m.can_be_true, can_be_false && m.can_be_false}; }
BoolMask operator!() const { return {can_be_false, can_be_true}; }

bool operator==(const BoolMask & other) const { return can_be_true == other.can_be_true && can_be_false == other.can_be_false; }

/// Check if mask is no longer changeable under BoolMask::combine.
/// We use this condition to early-exit KeyConditions::checkInRange methods.
bool isComplete(const BoolMask & initial_mask) const
{
return {can_be_false, can_be_true};
if (initial_mask == consider_only_can_be_true)
return can_be_true;
else if (initial_mask == consider_only_can_be_false)
return can_be_false;
else
return can_be_true && can_be_false;
}

/// If mask is (true, true), then it can no longer change under operation |.
/// We use this condition to early-exit KeyConditions::check{InRange,After} methods.
bool isComplete() const
/// Combine check result in different hyperrectangles.
static BoolMask combine(const BoolMask & left, const BoolMask & right)
{
return can_be_false && can_be_true;
return {left.can_be_true || right.can_be_true, left.can_be_false || right.can_be_false};
}

/// These special constants are used to implement KeyCondition::mayBeTrue{InRange,After} via KeyCondition::check{InRange,After}.
/// When used as an initial_mask argument in KeyCondition::check{InRange,After} methods, they effectively prevent
/// calculation of discarded BoolMask component as it is already set to true.
/// The following two special constants are used to speed up
/// KeyCondition::checkInRange. When used as an initial_mask argument, they
/// effectively prevent calculation of discarded BoolMask component as it is
/// no longer changeable under BoolMask::combine (isComplete).
static const BoolMask consider_only_can_be_true;
static const BoolMask consider_only_can_be_false;
};

namespace fmt
{
template <>
struct formatter<BoolMask>
{
static constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); }

template <typename FormatContext>
auto format(const BoolMask & mask, FormatContext & ctx)
{
return fmt::format_to(ctx.out(), "({}, {})", mask.can_be_true, mask.can_be_false);
}
};
}
Loading

0 comments on commit b0fe7c2

Please sign in to comment.