Skip to content

Commit

Permalink
Merge pull request ClickHouse#60570 from ClickHouse/revert-59595-vdim…
Browse files Browse the repository at this point in the history
…ir/analyzer/comute_alias_columns

Revert "Analyzer: compute ALIAS columns right after reading"
  • Loading branch information
tavplubix authored Feb 29, 2024
2 parents fb099bb + 2dedfd6 commit 8141e1c
Show file tree
Hide file tree
Showing 16 changed files with 227 additions and 241 deletions.
5 changes: 2 additions & 3 deletions src/Analyzer/Passes/QueryAnalysisPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6651,6 +6651,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
if (column_default && column_default->kind == ColumnDefaultKind::Alias)
{
auto alias_expression = buildQueryTree(column_default->expression, scope.context);
alias_expression = buildCastFunction(alias_expression, column_name_and_type.type, scope.context, false /*resolve*/);
auto column_node = std::make_shared<ColumnNode>(column_name_and_type, std::move(alias_expression), table_expression_node);
column_name_to_column_node.emplace(column_name_and_type.name, column_node);
alias_columns_to_resolve.emplace_back(column_name_and_type.name, column_node);
Expand Down Expand Up @@ -6683,9 +6684,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
alias_column_resolve_scope,
false /*allow_lambda_expression*/,
false /*allow_table_expression*/);
auto & resolved_expression = alias_column_to_resolve->getExpression();
if (!resolved_expression->getResultType()->equals(*alias_column_to_resolve->getResultType()))
resolved_expression = buildCastFunction(resolved_expression, alias_column_to_resolve->getResultType(), scope.context, true);

column_name_to_column_node = std::move(alias_column_resolve_scope.column_name_to_column_node);
column_name_to_column_node[alias_column_to_resolve_name] = alias_column_to_resolve;
}
Expand Down
7 changes: 1 addition & 6 deletions src/Interpreters/getHeaderForProcessingStage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,7 @@ Block getHeaderForProcessingStage(

auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(left_table_expression);
const auto & query_context = query_info.planner_context->getQueryContext();

NamesAndTypes columns;
const auto & column_name_to_column = table_expression_data.getColumnNameToColumn();
for (const auto & column_name : table_expression_data.getSelectedColumnsNames())
columns.push_back(column_name_to_column.at(column_name));

auto columns = table_expression_data.getColumns();
auto new_query_node = buildSubqueryToReadColumnsFromTableExpression(columns, left_table_expression, query_context);
query = new_query_node->toAST();
}
Expand Down
160 changes: 65 additions & 95 deletions src/Planner/CollectTableExpressionData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,34 @@ namespace
class CollectSourceColumnsVisitor : public InDepthQueryTreeVisitor<CollectSourceColumnsVisitor>
{
public:
explicit CollectSourceColumnsVisitor(PlannerContextPtr & planner_context_, bool keep_alias_columns_ = true)
explicit CollectSourceColumnsVisitor(PlannerContext & planner_context_)
: planner_context(planner_context_)
, keep_alias_columns(keep_alias_columns_)
{}

void visitImpl(QueryTreeNodePtr & node)
{
/// Special case for USING clause which contains references to ALIAS columns.
/// We can not modify such ColumnNode.
if (auto * join_node = node->as<JoinNode>())
{
if (!join_node->isUsingJoinExpression())
return;

auto & using_list = join_node->getJoinExpression()->as<ListNode&>();
for (auto & using_element : using_list)
{
auto & column_node = using_element->as<ColumnNode&>();
/// This list contains column nodes from left and right tables.
auto & columns_from_subtrees = column_node.getExpressionOrThrow()->as<ListNode&>().getNodes();

/// Visit left table column node.
visitUsingColumn(columns_from_subtrees[0]);
/// Visit right table column node.
visitUsingColumn(columns_from_subtrees[1]);
}
return;
}

auto * column_node = node->as<ColumnNode>();
if (!column_node)
return;
Expand All @@ -51,55 +72,22 @@ class CollectSourceColumnsVisitor : public InDepthQueryTreeVisitor<CollectSource

/// JOIN using expression
if (column_node->hasExpression() && column_source_node_type == QueryTreeNodeType::JOIN)
{
auto & columns_from_subtrees = column_node->getExpression()->as<ListNode &>().getNodes();
if (columns_from_subtrees.size() != 2)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected two columns in JOIN using expression for column {}", column_node->dumpTree());

visit(columns_from_subtrees[0]);
visit(columns_from_subtrees[1]);
return;
}

auto & table_expression_data = planner_context->getOrCreateTableExpressionData(column_source_node);
auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_source_node);

if (isAliasColumn(node))
if (column_node->hasExpression() && column_source_node_type != QueryTreeNodeType::ARRAY_JOIN)
{
/// Column is an ALIAS column with expression
/// Replace ALIAS column with expression
bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName());
if (!column_already_exists)
{
CollectSourceColumnsVisitor visitor_for_alias_column(planner_context);
/// While we are processing expression of ALIAS columns we should not add source columns to selected.
/// See also comment for `select_added_columns`
visitor_for_alias_column.select_added_columns = false;
visitor_for_alias_column.keep_alias_columns = keep_alias_columns;
visitor_for_alias_column.visit(column_node->getExpression());

if (!keep_alias_columns)
{
/// For PREWHERE we can just replace ALIAS column with it's expression,
/// because ActionsDAG for PREWHERE applied right on top of table expression
/// and cannot affect subqueries or other table expressions.
node = column_node->getExpression();
return;
}

auto column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(node);

ActionsDAGPtr alias_column_actions_dag = std::make_shared<ActionsDAG>();
PlannerActionsVisitor actions_visitor(planner_context, false);
auto outputs = actions_visitor.visit(alias_column_actions_dag, column_node->getExpression());
if (outputs.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected single output in actions dag for alias column {}. Actual {}", column_node->dumpTree(), outputs.size());
const auto & column_name = column_node->getColumnName();
const auto & alias_node = alias_column_actions_dag->addAlias(*outputs[0], column_name);
alias_column_actions_dag->addOrReplaceInOutputs(alias_node);
table_expression_data.addAliasColumn(column_node->getColumn(), column_identifier, alias_column_actions_dag, select_added_columns);
auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node);
table_expression_data.addAliasColumnName(column_node->getColumnName(), column_identifier);
}

node = column_node->getExpression();
visitImpl(node);
return;
}

Expand All @@ -114,58 +102,45 @@ class CollectSourceColumnsVisitor : public InDepthQueryTreeVisitor<CollectSource

bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName());
if (column_already_exists)
{
/// Column may be added when we collected data for ALIAS column
/// But now we see it directly in the query, so make sure it's marked as selected
if (select_added_columns)
table_expression_data.markSelectedColumn(column_node->getColumnName());
return;
}

auto column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(node);
table_expression_data.addColumn(column_node->getColumn(), column_identifier, select_added_columns);
auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node);
table_expression_data.addColumn(column_node->getColumn(), column_identifier);
}

static bool isAliasColumn(const QueryTreeNodePtr & node)
{
const auto * column_node = node->as<ColumnNode>();
if (!column_node || !column_node->hasExpression())
return false;
const auto & column_source = column_node->getColumnSourceOrNull();
if (!column_source)
return false;
return column_source->getNodeType() != QueryTreeNodeType::JOIN &&
column_source->getNodeType() != QueryTreeNodeType::ARRAY_JOIN;
}

static bool needChildVisit(const QueryTreeNodePtr & parent_node, const QueryTreeNodePtr & child_node)
static bool needChildVisit(const QueryTreeNodePtr & parent, const QueryTreeNodePtr & child_node)
{
if (auto * join_node = parent->as<JoinNode>())
{
if (join_node->getJoinExpression() == child_node && join_node->isUsingJoinExpression())
return false;
}
auto child_node_type = child_node->getNodeType();
return !(child_node_type == QueryTreeNodeType::QUERY ||
child_node_type == QueryTreeNodeType::UNION ||
isAliasColumn(parent_node));
return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
}

void setKeepAliasColumns(bool keep_alias_columns_)
private:

void visitUsingColumn(QueryTreeNodePtr & node)
{
keep_alias_columns = keep_alias_columns_;
auto & column_node = node->as<ColumnNode&>();
if (column_node.hasExpression())
{
auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_node.getColumnSource());
bool column_already_exists = table_expression_data.hasColumn(column_node.getColumnName());
if (column_already_exists)
return;

auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node);
table_expression_data.addAliasColumnName(column_node.getColumnName(), column_identifier);

visitImpl(column_node.getExpressionOrThrow());
}
else
visitImpl(node);
}

private:
PlannerContextPtr & planner_context;

/// Replace ALIAS columns with their expressions or register them in table expression data.
/// Usually we can replace them when we build some "local" actions DAG
/// (for example Row Policy or PREWHERE) that is applied on top of the table expression.
/// In other cases, we keep ALIAS columns as ColumnNode with an expression child node,
/// and handle them in the Planner by inserting ActionsDAG to compute them after reading from storage.
bool keep_alias_columns = true;

/// Flag `select_added_columns` indicates if we should mark column as explicitly selected.
/// For example, for table with columns (a Int32, b ALIAS a+1) and query SELECT b FROM table
/// Column `b` is selected explicitly by user, but not `a` (that is also read though).
/// Distinguishing such columns is important for checking access rights for ALIAS columns.
bool select_added_columns = true;
PlannerContext & planner_context;
};

class CollectPrewhereTableExpressionVisitor : public ConstInDepthQueryTreeVisitor<CollectPrewhereTableExpressionVisitor>
Expand Down Expand Up @@ -299,7 +274,7 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr
}
}

CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context);
CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context);
for (auto & node : query_node_typed.getChildren())
{
if (!node || node == query_node_typed.getPrewhere())
Expand All @@ -325,26 +300,21 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr
}

auto & table_expression_data = planner_context->getOrCreateTableExpressionData(prewhere_table_expression);
const auto & read_column_names = table_expression_data.getColumnNames();
NameSet required_column_names_without_prewhere(read_column_names.begin(), read_column_names.end());
const auto & selected_column_names = table_expression_data.getSelectedColumnsNames();
required_column_names_without_prewhere.insert(selected_column_names.begin(), selected_column_names.end());
const auto & column_names = table_expression_data.getColumnNames();
NameSet required_column_names_without_prewhere(column_names.begin(), column_names.end());

collect_source_columns_visitor.setKeepAliasColumns(false);
collect_source_columns_visitor.visit(query_node_typed.getPrewhere());

auto prewhere_actions_dag = std::make_shared<ActionsDAG>();

QueryTreeNodePtr query_tree_node = query_node_typed.getPrewhere();

PlannerActionsVisitor visitor(planner_context, false /*use_column_identifier_as_action_node_name*/);
auto expression_nodes = visitor.visit(prewhere_actions_dag, query_tree_node);
auto expression_nodes = visitor.visit(prewhere_actions_dag, query_node_typed.getPrewhere());
if (expression_nodes.size() != 1)
throw Exception(ErrorCodes::ILLEGAL_PREWHERE,
"Invalid PREWHERE. Expected single boolean expression. In query {}",
query_node->formatASTForErrorMessage());

prewhere_actions_dag->getOutputs().push_back(expression_nodes.back());
prewhere_actions_dag->getOutputs().push_back(expression_nodes[0]);

for (const auto & prewhere_input_node : prewhere_actions_dag->getInputs())
if (required_column_names_without_prewhere.contains(prewhere_input_node->result_name))
Expand All @@ -354,9 +324,9 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr
}
}

void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context, bool keep_alias_columns)
void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context)
{
CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context, keep_alias_columns);
CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context);
collect_source_columns_visitor.visit(expression_node);
}

Expand Down
2 changes: 1 addition & 1 deletion src/Planner/CollectTableExpressionData.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr
*
* ALIAS table column nodes are registered in table expression data and replaced in query tree with inner alias expression.
*/
void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context, bool keep_alias_columns = true);
void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context);

}
5 changes: 1 addition & 4 deletions src/Planner/PlannerActionsVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,15 +451,13 @@ class PlannerActionsVisitorImpl
std::unordered_map<QueryTreeNodePtr, std::string> node_to_node_name;
const PlannerContextPtr planner_context;
ActionNodeNameHelper action_node_name_helper;
bool use_column_identifier_as_action_node_name;
};

PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag,
const PlannerContextPtr & planner_context_,
bool use_column_identifier_as_action_node_name_)
: planner_context(planner_context_)
, action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_)
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
{
actions_stack.emplace_back(std::move(actions_dag), nullptr);
}
Expand Down Expand Up @@ -505,8 +503,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi
{
auto column_node_name = action_node_name_helper.calculateActionNodeName(node);
const auto & column_node = node->as<ColumnNode &>();
if (column_node.hasExpression() && !use_column_identifier_as_action_node_name)
return visitImpl(column_node.getExpression());

Int64 actions_stack_size = static_cast<Int64>(actions_stack.size() - 1);
for (Int64 i = actions_stack_size; i >= 0; --i)
{
Expand Down
Loading

0 comments on commit 8141e1c

Please sign in to comment.