From 8d1dfa1de9c63043e6896861c3310a1e06b810b7 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Wed, 16 Oct 2019 16:03:08 +0800 Subject: [PATCH] planner: reset NotNull flag for schema of Apply and MaxOneRow properly (#12419) (#12694) --- cmd/explaintest/r/tpch.result | 2 +- planner/core/integration_test.go | 17 ++++++++++++ planner/core/logical_plan_builder.go | 39 ++++++++++++++++------------ planner/core/util.go | 20 +++++++++++--- 4 files changed, 57 insertions(+), 21 deletions(-) diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index 45f0cdc023f06..027d22767a654 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -201,7 +201,7 @@ Projection_37 100.00 root tpch.supplier.s_acctbal, tpch.supplier.s_name, tpch.na │ └─TableReader_74 155496.00 root data:Selection_73 │ └─Selection_73 155496.00 cop eq(tpch.part.p_size, 30), like(tpch.part.p_type, "%STEEL", 92) │ └─TableScan_72 10000000.00 cop table:part, range:[-inf,+inf], keep order:false - └─Selection_75 6524008.35 root not(isnull(min(ps_supplycost))) + └─Selection_75 6524008.35 root not(isnull(19_col_0)) └─HashAgg_78 8155010.44 root group by:tpch.partsupp.ps_partkey, funcs:min(tpch.partsupp.ps_supplycost), firstrow(tpch.partsupp.ps_partkey) └─HashRightJoin_82 8155010.44 root inner join, inner:HashRightJoin_84, equal:[eq(tpch.supplier.s_suppkey, tpch.partsupp.ps_suppkey)] ├─HashRightJoin_84 100000.00 root inner join, inner:HashRightJoin_90, equal:[eq(tpch.nation.n_nationkey, tpch.supplier.s_nationkey)] diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 21f2ad151f7c5..e65a4e844ad03 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -105,3 +105,20 @@ func (s *testIntegrationSuite) runTestsWithTestData(caseName string, tk *testkit tk.MustQuery(tt).Check(testkit.Rows(output[i].Plan...)) } } + +func (s *testIntegrationSuite) TestApplyNotNullFlag(c *C) { + store, dom, err := newStoreWithBootstrap() + c.Assert(err, IsNil) + tk := testkit.NewTestKit(c, store) + defer func() { + dom.Close() + store.Close() + }() + tk.MustExec("use test") + tk.MustExec("drop table if exists t1, t2") + tk.MustExec("create table t1(x int not null)") + tk.MustExec("create table t2(x int)") + tk.MustExec("insert into t2 values (1)") + + tk.MustQuery("select IFNULL((select t1.x from t1 where t1.x = t2.x), 'xxx') as col1 from t2").Check(testkit.Rows("xxx")) +} diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 01ea7053aef84..5ab28bd2e8e00 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -231,14 +231,14 @@ func (p *LogicalJoin) extractOnCondition(conditions []expression.Expression, der arg0, lOK := binop.GetArgs()[0].(*expression.Column) arg1, rOK := binop.GetArgs()[1].(*expression.Column) if lOK && rOK { - var leftCol, rightCol *expression.Column - if left.Schema().Contains(arg0) && right.Schema().Contains(arg1) { - leftCol, rightCol = arg0, arg1 + leftCol := left.Schema().RetrieveColumn(arg0) + rightCol := right.Schema().RetrieveColumn(arg1) + if leftCol == nil || rightCol == nil { + leftCol = left.Schema().RetrieveColumn(arg1) + rightCol = right.Schema().RetrieveColumn(arg0) + arg0, arg1 = arg1, arg0 } - if leftCol == nil && left.Schema().Contains(arg1) && right.Schema().Contains(arg0) { - leftCol, rightCol = arg1, arg0 - } - if leftCol != nil { + if leftCol != nil && rightCol != nil { // Do not derive `is not null` for anti join, since it may cause wrong results. // For example: // `select * from t t1 where t1.a not in (select b from t t2)` does not imply `t2.b is not null`, @@ -256,16 +256,16 @@ func (p *LogicalJoin) extractOnCondition(conditions []expression.Expression, der rightCond = append(rightCond, notNullExpr) } } - } - // For quries like `select a in (select a from s where s.b = t.b) from t`, - // if subquery is empty caused by `s.b = t.b`, the result should always be - // false even if t.a is null or s.a is null. To make this join "empty aware", - // we should differentiate `t.a = s.a` from other column equal conditions, so - // we put it into OtherConditions instead of EqualConditions of join. - if leftCol != nil && binop.FuncName.L == ast.EQ && !leftCol.InOperand && !rightCol.InOperand { - cond := expression.NewFunctionInternal(ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), leftCol, rightCol) - eqCond = append(eqCond, cond.(*expression.ScalarFunction)) - continue + // For queries like `select a in (select a from s where s.b = t.b) from t`, + // if subquery is empty caused by `s.b = t.b`, the result should always be + // false even if t.a is null or s.a is null. To make this join "empty aware", + // we should differentiate `t.a = s.a` from other column equal conditions, so + // we put it into OtherConditions instead of EqualConditions of join. + if binop.FuncName.L == ast.EQ && !arg0.InOperand && !arg1.InOperand { + cond := expression.NewFunctionInternal(ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), arg0, arg1) + eqCond = append(eqCond, cond.(*expression.ScalarFunction)) + continue + } } } } @@ -2461,6 +2461,11 @@ func (b *PlanBuilder) buildApplyWithJoinType(outerPlan, innerPlan LogicalPlan, t ap := LogicalApply{LogicalJoin: LogicalJoin{JoinType: tp}}.Init(b.ctx) ap.SetChildren(outerPlan, innerPlan) ap.SetSchema(expression.MergeSchema(outerPlan.Schema(), innerPlan.Schema())) + // Note that, tp can only be LeftOuterJoin or InnerJoin, so we don't consider other outer joins. + if tp == LeftOuterJoin { + b.optFlag = b.optFlag | flagEliminateOuterJoin + resetNotNullFlag(ap.schema, outerPlan.Schema().Len(), ap.schema.Len()) + } for i := outerPlan.Schema().Len(); i < ap.Schema().Len(); i++ { ap.schema.Columns[i].IsReferenced = true } diff --git a/planner/core/util.go b/planner/core/util.go index d1ec877b3cb27..4d6fd39973712 100644 --- a/planner/core/util.go +++ b/planner/core/util.go @@ -129,16 +129,30 @@ func (s *baseSchemaProducer) SetSchema(schema *expression.Schema) { s.schema = schema } +// Schema implements the Plan.Schema interface. +func (p *LogicalMaxOneRow) Schema() *expression.Schema { + s := p.Children()[0].Schema().Clone() + resetNotNullFlag(s, 0, s.Len()) + return s +} + func buildLogicalJoinSchema(joinType JoinType, join LogicalPlan) *expression.Schema { + leftSchema := join.Children()[0].Schema() switch joinType { case SemiJoin, AntiSemiJoin: - return join.Children()[0].Schema().Clone() + return leftSchema.Clone() case LeftOuterSemiJoin, AntiLeftOuterSemiJoin: - newSchema := join.Children()[0].Schema().Clone() + newSchema := leftSchema.Clone() newSchema.Append(join.Schema().Columns[join.Schema().Len()-1]) return newSchema } - return expression.MergeSchema(join.Children()[0].Schema(), join.Children()[1].Schema()) + newSchema := expression.MergeSchema(leftSchema, join.Children()[1].Schema()) + if joinType == LeftOuterJoin { + resetNotNullFlag(newSchema, leftSchema.Len(), newSchema.Len()) + } else if joinType == RightOuterJoin { + resetNotNullFlag(newSchema, 0, leftSchema.Len()) + } + return newSchema } func buildPhysicalJoinSchema(joinType JoinType, join PhysicalPlan) *expression.Schema {