From e82e83ee39f7c9f6054cccecce9e79ed9efe029c Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Tue, 28 Aug 2018 15:50:47 +0800 Subject: [PATCH 01/21] plan: convert in subquery to agg and inner join --- plan/expression_rewriter.go | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/plan/expression_rewriter.go b/plan/expression_rewriter.go index b2811f8925d5c..139e5d6cefa74 100644 --- a/plan/expression_rewriter.go +++ b/plan/expression_rewriter.go @@ -670,9 +670,31 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, er.err = errors.Trace(err) return v, true } - er.p, er.err = er.b.buildSemiApply(er.p, np, expression.SplitCNFItems(checkCondition), asScalar, v.Not) - if er.err != nil { - return v, true + if !v.Not && !asScalar { + agg := er.b.buildDistinct(np, np.Schema().Len()) + eq, left, right, other := extractOnCondition(expression.SplitCNFItems(checkCondition), er.p, agg) + join := LogicalJoin{ + JoinType: InnerJoin, + EqualConditions: eq, + LeftConditions: left, + RightConditions: right, + OtherConditions: other, + }.init(er.ctx) + join.SetChildren(er.p, agg) + join.SetSchema(expression.MergeSchema(er.p.Schema(), agg.schema)) + proj := LogicalProjection{}.init(er.ctx) + proj.Exprs = make([]expression.Expression, 0, er.p.Schema().Len()) + for _, col := range er.p.Schema().Columns { + proj.Exprs = append(proj.Exprs, col) + } + proj.SetSchema(er.p.Schema()) + proj.SetChildren(join) + er.p = proj + } else { + er.p, er.err = er.b.buildSemiApply(er.p, np, expression.SplitCNFItems(checkCondition), asScalar, v.Not) + if er.err != nil { + return v, true + } } if asScalar { From 4cac41f16d9d5ba913f9e3de13ab9400369944bf Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 29 Aug 2018 13:47:25 +0800 Subject: [PATCH 02/21] Modify rule. So it won't be worse than before. --- plan/expression_rewriter.go | 21 +++++++++++++-------- plan/optimizer.go | 4 ++++ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/plan/expression_rewriter.go b/plan/expression_rewriter.go index 139e5d6cefa74..dc9b4e1fc95d4 100644 --- a/plan/expression_rewriter.go +++ b/plan/expression_rewriter.go @@ -670,8 +670,13 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, er.err = errors.Trace(err) return v, true } - if !v.Not && !asScalar { + if !v.Not && !asScalar && len(np.extractCorrelatedCols()) == 0 { + er.b.optFlag |= flagEliminateAgg + er.b.optFlag |= flagEliminateProjection2 agg := er.b.buildDistinct(np, np.Schema().Len()) + for _, col := range agg.schema.Columns { + col.IsAggOrSubq = true + } eq, left, right, other := extractOnCondition(expression.SplitCNFItems(checkCondition), er.p, agg) join := LogicalJoin{ JoinType: InnerJoin, @@ -682,14 +687,14 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, }.init(er.ctx) join.SetChildren(er.p, agg) join.SetSchema(expression.MergeSchema(er.p.Schema(), agg.schema)) - proj := LogicalProjection{}.init(er.ctx) - proj.Exprs = make([]expression.Expression, 0, er.p.Schema().Len()) - for _, col := range er.p.Schema().Columns { - proj.Exprs = append(proj.Exprs, col) + // Apply forces to choose hash join currently, so don't worry the hints will take effect if the semi join is in one apply. + if er.b.TableHints() != nil { + er.err = join.setPreferredJoinType(er.b.TableHints()) + if er.err != nil { + return v, true + } } - proj.SetSchema(er.p.Schema()) - proj.SetChildren(join) - er.p = proj + er.p = join } else { er.p, er.err = er.b.buildSemiApply(er.p, np, expression.SplitCNFItems(checkCondition), asScalar, v.Not) if er.err != nil { diff --git a/plan/optimizer.go b/plan/optimizer.go index 63cc202b0a8f4..60823dbb801fd 100644 --- a/plan/optimizer.go +++ b/plan/optimizer.go @@ -31,7 +31,9 @@ const ( flagPrunColumns uint64 = 1 << iota flagEliminateProjection flagBuildKeyInfo + flagEliminateAgg flagDecorrelate + flagEliminateProjection2 flagMaxMinEliminate flagPredicatePushDown flagPartitionProcessor @@ -43,7 +45,9 @@ var optRuleList = []logicalOptRule{ &columnPruner{}, &projectionEliminater{}, &buildKeySolver{}, + &aggregationEliminater{}, &decorrelateSolver{}, + &projectionEliminater{}, &maxMinEliminator{}, &ppdSolver{}, &partitionProcessor{}, From 6c06056d0a373cc3b660a9fda97c23b38da14de8 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 29 Aug 2018 13:56:39 +0800 Subject: [PATCH 03/21] rename the variable --- executor/aggregate_test.go | 4 ++-- executor/join_test.go | 4 ++-- plan/expression_rewriter.go | 34 +------------------------------- session/session.go | 2 +- sessionctx/variable/session.go | 8 ++++---- sessionctx/variable/sysvar.go | 2 +- sessionctx/variable/tidb_vars.go | 6 +++--- sessionctx/variable/varsutil.go | 2 +- 8 files changed, 15 insertions(+), 47 deletions(-) diff --git a/executor/aggregate_test.go b/executor/aggregate_test.go index 3ce424ca18106..51d3f15806036 100644 --- a/executor/aggregate_test.go +++ b/executor/aggregate_test.go @@ -248,10 +248,10 @@ func (s *testSuite) TestAggregation(c *C) { tk.MustExec("create table t2 (c1 int)") tk.MustExec("insert into t1 values(3), (2)") tk.MustExec("insert into t2 values(1), (2)") - tk.MustExec("set @@session.tidb_opt_insubquery_unfold = 1") + tk.MustExec("set @@session.tidb_opt_insubquery_rewriting = 0") result = tk.MustQuery("select sum(c1 in (select * from t2)) from t1") result.Check(testkit.Rows("1")) - tk.MustExec("set @@session.tidb_opt_insubquery_unfold = 0") + tk.MustExec("set @@session.tidb_opt_insubquery_rewriting = 1") result = tk.MustQuery("select sum(c1 in (select * from t2)) from t1") result.Check(testkit.Rows("1")) result = tk.MustQuery("select sum(c1) k from (select * from t1 union all select * from t2)t group by c1 * 2 order by k") diff --git a/executor/join_test.go b/executor/join_test.go index c8a9b8cad5c58..03471b17703f2 100644 --- a/executor/join_test.go +++ b/executor/join_test.go @@ -726,14 +726,14 @@ func (s *testSuite) TestInSubquery(c *C) { tk.MustExec("create table t2 (a int)") tk.MustExec("insert into t1 values (1),(2)") tk.MustExec("insert into t2 values (1),(2)") - tk.MustExec("set @@session.tidb_opt_insubquery_unfold = 1") + tk.MustExec("set @@session.tidb_opt_insubquery_rewriting = 0") result = tk.MustQuery("select * from t1 where a in (select * from t2)") result.Sort().Check(testkit.Rows("1", "2")) result = tk.MustQuery("select * from t1 where a in (select * from t2 where false)") result.Check(testkit.Rows()) result = tk.MustQuery("select * from t1 where a not in (select * from t2 where false)") result.Sort().Check(testkit.Rows("1", "2")) - tk.MustExec("set @@session.tidb_opt_insubquery_unfold = 0") + tk.MustExec("set @@session.tidb_opt_insubquery_rewriting = 1") result = tk.MustQuery("select * from t1 where a in (select * from t2)") result.Sort().Check(testkit.Rows("1", "2")) result = tk.MustQuery("select * from t1 where a in (select * from t2 where false)") diff --git a/plan/expression_rewriter.go b/plan/expression_rewriter.go index dc9b4e1fc95d4..4c05c7dd7d267 100644 --- a/plan/expression_rewriter.go +++ b/plan/expression_rewriter.go @@ -617,38 +617,6 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, er.err = expression.ErrOperandColumns.GenByArgs(lLen) return v, true } - // Sometimes we can unfold the in subquery. For example, a in (select * from t) can rewrite to `a in (1,2,3,4)`. - // TODO: Now we cannot add it to CBO framework. Instead, user can set a session variable to open this optimization. - // We will improve our CBO framework in future. - if lLen == 1 && er.ctx.GetSessionVars().AllowInSubqueryUnFolding && len(np.extractCorrelatedCols()) == 0 { - physicalPlan, err1 := doOptimize(er.b.optFlag, np) - if err1 != nil { - er.err = errors.Trace(err1) - return v, true - } - rows, err1 := EvalSubquery(physicalPlan, er.b.is, er.b.ctx) - if err1 != nil { - er.err = errors.Trace(err1) - return v, true - } - for _, row := range rows { - con := &expression.Constant{ - Value: row[0], - RetType: np.Schema().Columns[0].GetType(), - } - er.ctxStack = append(er.ctxStack, con) - } - listLen := len(rows) - if listLen == 0 { - er.ctxStack[len(er.ctxStack)-1] = &expression.Constant{ - Value: types.NewDatum(v.Not), - RetType: types.NewFieldType(mysql.TypeTiny), - } - } else { - er.inToExpression(listLen, v.Not, &v.Type) - } - return v, true - } var rexpr expression.Expression if np.Schema().Len() == 1 { rexpr = np.Schema().Columns[0] @@ -670,7 +638,7 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, er.err = errors.Trace(err) return v, true } - if !v.Not && !asScalar && len(np.extractCorrelatedCols()) == 0 { + if er.ctx.GetSessionVars().AllowInSubqueryRewriting && !v.Not && !asScalar && len(np.extractCorrelatedCols()) == 0 { er.b.optFlag |= flagEliminateAgg er.b.optFlag |= flagEliminateProjection2 agg := er.b.buildDistinct(np, np.Schema().Len()) diff --git a/session/session.go b/session/session.go index d76313e581abd..8fab04209f3b7 100644 --- a/session/session.go +++ b/session/session.go @@ -1263,7 +1263,7 @@ const loadCommonGlobalVarsSQL = "select HIGH_PRIORITY * from mysql.global_variab variable.TiDBHashAggFinalConcurrency + quoteCommaQuote + variable.TiDBBackoffLockFast + quoteCommaQuote + variable.TiDBDDLReorgWorkerCount + quoteCommaQuote + - variable.TiDBOptInSubqUnFolding + quoteCommaQuote + + variable.TiDBOptInSubqRewriting + quoteCommaQuote + variable.TiDBDistSQLScanConcurrency + quoteCommaQuote + variable.TiDBMaxChunkSize + quoteCommaQuote + variable.TiDBRetryLimit + quoteCommaQuote + diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index f15e4cae39c8c..2a0baf978c8a8 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -248,8 +248,8 @@ type SessionVars struct { // AllowAggPushDown can be set to false to forbid aggregation push down. AllowAggPushDown bool - // AllowInSubqueryUnFolding can be set to true to fold in subquery - AllowInSubqueryUnFolding bool + // AllowInSubqueryRewriting can be set to true to fold in subquery + AllowInSubqueryRewriting bool // CurrInsertValues is used to record current ValuesExpr's values. // See http://dev.mysql.com/doc/refman/5.7/en/miscellaneous-functions.html#function_values @@ -509,8 +509,8 @@ func (s *SessionVars) SetSystemVar(name string, val string) error { s.SkipUTF8Check = TiDBOptOn(val) case TiDBOptAggPushDown: s.AllowAggPushDown = TiDBOptOn(val) - case TiDBOptInSubqUnFolding: - s.AllowInSubqueryUnFolding = TiDBOptOn(val) + case TiDBOptInSubqRewriting: + s.AllowInSubqueryRewriting = TiDBOptOn(val) case TiDBIndexLookupConcurrency: s.IndexLookupConcurrency = tidbOptPositiveInt32(val, DefIndexLookupConcurrency) case TiDBIndexLookupJoinConcurrency: diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index d62b2bea3ebe8..87b5902818bfe 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -625,7 +625,7 @@ var defaultSysVars = []*SysVar{ {ScopeGlobal, TiDBAutoAnalyzeRatio, strconv.FormatFloat(DefAutoAnalyzeRatio, 'f', -1, 64)}, {ScopeSession, TiDBChecksumTableConcurrency, strconv.Itoa(DefChecksumTableConcurrency)}, {ScopeGlobal | ScopeSession, TiDBDistSQLScanConcurrency, strconv.Itoa(DefDistSQLScanConcurrency)}, - {ScopeGlobal | ScopeSession, TiDBOptInSubqUnFolding, boolToIntStr(DefOptInSubqUnfolding)}, + {ScopeGlobal | ScopeSession, TiDBOptInSubqRewriting, boolToIntStr(DefOptInSubqRewriting)}, {ScopeGlobal | ScopeSession, TiDBIndexJoinBatchSize, strconv.Itoa(DefIndexJoinBatchSize)}, {ScopeGlobal | ScopeSession, TiDBIndexLookupSize, strconv.Itoa(DefIndexLookupSize)}, {ScopeGlobal | ScopeSession, TiDBIndexLookupConcurrency, strconv.Itoa(DefIndexLookupConcurrency)}, diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 7909cf85c1d1d..f242f9e5619e6 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -113,8 +113,8 @@ const ( // If the query has a LIMIT clause, high concurrency makes the system do much more work than needed. TiDBDistSQLScanConcurrency = "tidb_distsql_scan_concurrency" - // tidb_opt_insubquery_unfold is used to enable/disable the optimizer rule of in subquery unfold. - TiDBOptInSubqUnFolding = "tidb_opt_insubquery_unfold" + // tidb_opt_insubquery_rewriting is used to enable/disable the optimizer rule of in subquery unfold. + TiDBOptInSubqRewriting = "tidb_opt_insubquery_rewriting" // tidb_index_join_batch_size is used to set the batch size of a index lookup join. // The index lookup join fetches batches of data from outer executor and constructs ranges for inner executor. @@ -192,7 +192,7 @@ const ( DefChecksumTableConcurrency = 4 DefSkipUTF8Check = false DefOptAggPushDown = false - DefOptInSubqUnfolding = false + DefOptInSubqRewriting = true DefBatchInsert = false DefBatchDelete = false DefCurretTS = 0 diff --git a/sessionctx/variable/varsutil.go b/sessionctx/variable/varsutil.go index c11f8501db71a..a31eabbbb1991 100644 --- a/sessionctx/variable/varsutil.go +++ b/sessionctx/variable/varsutil.go @@ -278,7 +278,7 @@ func ValidateSetSystemVar(vars *SessionVars, name string, value string) (string, } return value, ErrWrongValueForVar.GenByArgs(name, value) case AutocommitVar, TiDBSkipUTF8Check, TiDBOptAggPushDown, - TiDBOptInSubqUnFolding, TiDBEnableTablePartition, + TiDBOptInSubqRewriting, TiDBEnableTablePartition, TiDBBatchInsert, TiDBDisableTxnAutoRetry, TiDBEnableStreaming, TiDBBatchDelete: if strings.EqualFold(value, "ON") || value == "1" || strings.EqualFold(value, "OFF") || value == "0" { From 7b2260578eac099aeebad110ab426dd736c4fc5f Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 29 Aug 2018 14:11:38 +0800 Subject: [PATCH 04/21] fix the unit tests. --- plan/cbo_test.go | 2 +- plan/physical_plan_test.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/plan/cbo_test.go b/plan/cbo_test.go index 832ac06ede355..b8bd4138c3d1b 100644 --- a/plan/cbo_test.go +++ b/plan/cbo_test.go @@ -355,7 +355,7 @@ func (s *testAnalyzeSuite) TestEmptyTable(c *C) { }, { sql: "select * from t where c1 in (select c1 from t1)", - best: "LeftHashJoin{TableReader(Table(t))->TableReader(Table(t1))}(test.t.c1,test.t1.c1)", + best: "LeftHashJoin{TableReader(Table(t))->TableReader(Table(t1)->HashAgg)->HashAgg}(test.t.c1,test.t1.c1)->Projection", }, { sql: "select * from t, t1 where t.c1 = t1.c1", diff --git a/plan/physical_plan_test.go b/plan/physical_plan_test.go index 8fc193f08b381..0f7f7735517d1 100644 --- a/plan/physical_plan_test.go +++ b/plan/physical_plan_test.go @@ -388,12 +388,12 @@ func (s *testPlanSuite) TestDAGPlanBuilderJoin(c *C) { // Test Semi Join hint success. { sql: "select /*+ TIDB_INLJ(t1) */ * from t t1 where t1.a in (select a from t t2)", - best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(t1.a,t2.a)", + best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(t1.a,t2.a)->Projection", }, // Test Semi Join hint fail. { sql: "select /*+ TIDB_INLJ(t2) */ * from t t1 where t1.a in (select a from t t2)", - best: "MergeSemiJoin{TableReader(Table(t))->TableReader(Table(t))}(t1.a,t2.a)", + best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(t2.a,t1.a)->Projection", }, { sql: "select /*+ TIDB_INLJ(t1) */ * from t t1 join t t2 where t1.c=t2.c and t1.f=t2.f", @@ -452,7 +452,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderSubquery(c *C) { //}, { sql: "select * from t where a in (select s.a from t s) order by t.a", - best: "MergeSemiJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,s.a)", + best: "MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,s.a)->Projection", }, // Test Nested sub query. { @@ -462,7 +462,7 @@ func (s *testPlanSuite) TestDAGPlanBuilderSubquery(c *C) { // Test Semi Join + Order by. { sql: "select * from t where a in (select a from t) order by b", - best: "MergeSemiJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->Sort", + best: "MergeInnerJoin{TableReader(Table(t))->TableReader(Table(t))}(test.t.a,test.t.a)->Projection->Sort", }, // Test Apply. { From 500b54ab7a7c097197490d346ef39bee8c718ee7 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 29 Aug 2018 14:12:19 +0800 Subject: [PATCH 05/21] add new rule to eliminate agg by key. --- plan/rule_aggregation_elimination.go | 126 +++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 plan/rule_aggregation_elimination.go diff --git a/plan/rule_aggregation_elimination.go b/plan/rule_aggregation_elimination.go new file mode 100644 index 0000000000000..d3da642e22451 --- /dev/null +++ b/plan/rule_aggregation_elimination.go @@ -0,0 +1,126 @@ +// Copyright 2018 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package plan + +import ( + "github.com/pingcap/tidb/ast" + "github.com/pingcap/tidb/expression" + "github.com/pingcap/tidb/expression/aggregation" + "github.com/pingcap/tidb/mysql" + "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/types" +) + +type aggregationEliminater struct { +} + +// tryToEliminateAggregation will eliminate aggregation grouped by unique key. +// e.g. select min(b) from t group by a. If a is a unique key, then this sql is equal to `select b from t group by a`. +// For count(expr), sum(expr), avg(expr), count(distinct expr, [expr...]) we may need to rewrite the expr. Details are shown below. +// If we can eliminate agg successful, we return a projection. Else we return a nil pointer. +func (a *aggregationEliminater) tryToEliminateAggregation(agg *LogicalAggregation) *LogicalProjection { + schemaByGroupby := expression.NewSchema(agg.groupByCols...) + coveredByUniqueKey := false + for _, key := range agg.children[0].Schema().Keys { + if schemaByGroupby.ColumnsIndices(key) != nil { + coveredByUniqueKey = true + break + } + } + if coveredByUniqueKey { + // GroupByCols has unique key, so this aggregation can be removed. + proj := a.convertAggToProj(agg) + proj.SetChildren(agg.children[0]) + return proj + } + return nil +} + +func (a *aggregationEliminater) convertAggToProj(agg *LogicalAggregation) *LogicalProjection { + proj := LogicalProjection{ + Exprs: make([]expression.Expression, 0, len(agg.AggFuncs)), + }.init(agg.ctx) + for _, fun := range agg.AggFuncs { + expr := a.rewriteExpr(agg.ctx, fun) + proj.Exprs = append(proj.Exprs, expr) + } + proj.SetSchema(agg.schema.Clone()) + return proj +} + +// rewriteExpr will rewrite the aggregate function to expression doesn't contain aggregate function. +func (a *aggregationEliminater) rewriteExpr(ctx sessionctx.Context, aggFunc *aggregation.AggFuncDesc) expression.Expression { + switch aggFunc.Name { + case ast.AggFuncCount: + if aggFunc.Mode == aggregation.FinalMode { + return a.rewriteSumOrAvg(ctx, aggFunc.Args) + } + return a.rewriteCount(ctx, aggFunc.Args) + case ast.AggFuncSum, ast.AggFuncAvg: + return a.rewriteSumOrAvg(ctx, aggFunc.Args) + default: + // Default we do nothing about expr. + return aggFunc.Args[0] + } +} + +func (a *aggregationEliminater) rewriteCount(ctx sessionctx.Context, exprs []expression.Expression) expression.Expression { + // If is count(expr), we will change it to if(isnull(expr), 0, 1). + // If is count(distinct x, y, z) we will change it to if(isnull(x) or isnull(y) or isnull(z), 0, 1). + isNullExprs := make([]expression.Expression, 0, len(exprs)) + for _, expr := range exprs { + isNullExpr := expression.NewFunctionInternal(ctx, ast.IsNull, types.NewFieldType(mysql.TypeTiny), expr) + isNullExprs = append(isNullExprs, isNullExpr) + } + innerExpr := expression.ComposeDNFCondition(ctx, isNullExprs...) + newExpr := expression.NewFunctionInternal(ctx, ast.If, types.NewFieldType(mysql.TypeLonglong), innerExpr, expression.Zero, expression.One) + return newExpr +} + +// See https://dev.mysql.com/doc/refman/5.7/en/group-by-functions.html +// The SUM() and AVG() functions return a DECIMAL value for exact-value arguments (integer or DECIMAL), +// and a DOUBLE value for approximate-value arguments (FLOAT or DOUBLE). +func (a *aggregationEliminater) rewriteSumOrAvg(ctx sessionctx.Context, exprs []expression.Expression) expression.Expression { + // FIXME: Consider the case that avg is final mode. + expr := exprs[0] + switch expr.GetType().Tp { + // Integer type should be cast to decimal. + case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong: + return expression.BuildCastFunction(ctx, expr, types.NewFieldType(mysql.TypeNewDecimal)) + // Double and Decimal doesn't need to be cast. + case mysql.TypeDouble, mysql.TypeNewDecimal: + return expr + // Float should be cast to double. And other non-numeric type should be cast to double too. + default: + return expression.BuildCastFunction(ctx, expr, types.NewFieldType(mysql.TypeDouble)) + } +} + +func (a *aggregationEliminater) optimize(p LogicalPlan) (LogicalPlan, error) { + newChildren := make([]LogicalPlan, 0, len(p.Children())) + for _, child := range p.Children() { + newChild, _ := a.optimize(child) + newChildren = append(newChildren, newChild) + } + p.SetChildren(newChildren...) + agg, ok := p.(*LogicalAggregation) + if !ok { + return p, nil + } + if proj := a.tryToEliminateAggregation(agg); proj != nil { + return proj, nil + } + return p, nil +} + From 62705f68d4b6065ab831771271627084c9d5aa58 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 29 Aug 2018 14:43:22 +0800 Subject: [PATCH 06/21] fix explain test. --- cmd/explaintest/r/explain_easy.result | 22 ++++-- cmd/explaintest/r/explain_easy_stats.result | 12 ++- cmd/explaintest/r/tpch.result | 83 +++++++++++---------- cmd/explaintest/t/explain_easy.test | 3 - cmd/explaintest/t/explain_easy_stats.test | 3 - 5 files changed, 64 insertions(+), 59 deletions(-) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index d08e79cde7ce6..cc6b6d4a6c191 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -5,7 +5,6 @@ create table t2 (c1 int unique, c2 int); insert into t2 values(1, 0), (2, 1); create table t3 (a bigint, b bigint, c bigint, d bigint); create table t4 (a int, b int, c int, index idx(a, b), primary key(a)); -set @@session.tidb_opt_insubquery_unfold = 1; set @@session.tidb_opt_agg_push_down = 1; explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a ); id count task operator info @@ -87,14 +86,22 @@ TableReader_7 0.33 root data:Selection_6 └─TableScan_5 1.00 cop table:t1, range:[1,1], keep order:false, stats:pseudo explain select sum(t1.c1 in (select c1 from t2)) from t1; id count task operator info -StreamAgg_21 1.00 root funcs:sum(col_0) -└─TableReader_22 1.00 root data:StreamAgg_13 - └─StreamAgg_13 1.00 cop funcs:sum(in(test.t1.c1, 1, 2)) - └─TableScan_20 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo +StreamAgg_12 1.00 root funcs:sum(5_aux_0) +└─MergeJoin_28 10000.00 root left outer semi join, left key:test.t1.c1, right key:test.t2.c1 + ├─TableReader_19 10000.00 root data:TableScan_18 + │ └─TableScan_18 10000.00 cop table:t1, range:[-inf,+inf], keep order:true, stats:pseudo + └─IndexReader_23 10000.00 root index:IndexScan_22 + └─IndexScan_22 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo explain select c1 from t1 where c1 in (select c2 from t2); id count task operator info -TableReader_11 2.00 root data:TableScan_10 -└─TableScan_10 2.00 cop table:t1, range:[0,0], [1,1], keep order:false, stats:pseudo +Projection_8 10000.00 root test.t1.c1 +└─IndexJoin_11 10000.00 root inner join, inner:TableReader_10, outer key:test.t2.c2, inner key:test.t1.c1 + ├─TableReader_10 10.00 root data:TableScan_9 + │ └─TableScan_9 10.00 cop table:t1, range: decided by [test.t2.c2], keep order:false, stats:pseudo + └─HashAgg_18 8000.00 root group by:col_1, funcs:firstrow(col_0) + └─TableReader_19 8000.00 root data:HashAgg_14 + └─HashAgg_14 8000.00 cop group by:test.t2.c2, funcs:firstrow(test.t2.c2) + └─TableScan_17 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo explain select (select count(1) k from t1 s where s.c1 = t1.c1 having k != 0) from t1; id count task operator info Projection_13 10000.00 root k @@ -181,7 +188,6 @@ HashAgg_15 24000.00 root group by:t2.c1, funcs:firstrow(join_agg_0) └─IndexReader_64 8000.00 root index:StreamAgg_54 └─StreamAgg_54 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) └─IndexScan_62 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo -set @@session.tidb_opt_insubquery_unfold = 0; explain select sum(t1.c1 in (select c1 from t2)) from t1; id count task operator info StreamAgg_12 1.00 root funcs:sum(5_aux_0) diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result index 7aec0dda5f6fe..1c29ae19b3c1d 100644 --- a/cmd/explaintest/r/explain_easy_stats.result +++ b/cmd/explaintest/r/explain_easy_stats.result @@ -8,7 +8,6 @@ create table t3 (a bigint, b bigint, c bigint, d bigint); load stats 's/explain_easy_stats_t3.json'; create table index_prune(a bigint(20) NOT NULL, b bigint(20) NOT NULL, c tinyint(4) NOT NULL, primary key(a, b), index idx_b_c_a(b, c, a)); load stats 's/explain_easy_stats_index_prune.json'; -set @@session.tidb_opt_insubquery_unfold = 1; set @@session.tidb_opt_agg_push_down = 1; explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a ); id count task operator info @@ -93,8 +92,14 @@ TableReader_7 0.50 root data:Selection_6 └─TableScan_5 1.00 cop table:t1, range:[1,1], keep order:false explain select c1 from t1 where c1 in (select c2 from t2); id count task operator info -Projection_10 0.00 root test.t1.c1 -└─TableDual_11 0.00 root rows:0 +Projection_8 1985.00 root test.t1.c1 +└─IndexJoin_11 1985.00 root inner join, inner:TableReader_10, outer key:test.t2.c2, inner key:test.t1.c1 + ├─TableReader_10 1.00 root data:TableScan_9 + │ └─TableScan_9 1.00 cop table:t1, range: decided by [test.t2.c2], keep order:false + └─HashAgg_18 1985.00 root group by:col_1, funcs:firstrow(col_0) + └─TableReader_19 1985.00 root data:HashAgg_14 + └─HashAgg_14 1985.00 cop group by:test.t2.c2, funcs:firstrow(test.t2.c2) + └─TableScan_17 1985.00 cop table:t2, range:[-inf,+inf], keep order:false explain select * from information_schema.columns; id count task operator info MemTableScan_4 10000.00 root @@ -115,7 +120,6 @@ Limit_10 1.00 root offset:0, count:1 └─TableReader_21 1.00 root data:Limit_20 └─Limit_20 1.00 cop offset:0, count:1 └─TableScan_18 1.00 cop table:t1, range:[-inf,+inf], keep order:true, desc -set @@session.tidb_opt_insubquery_unfold = 0; explain select 1 in (select c2 from t2) from t1; id count task operator info Projection_6 1999.00 root 5_aux_0 diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index cdc22f2d84837..a5869058332f3 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -1021,24 +1021,24 @@ o_totalprice desc, o_orderdate limit 100; id count task operator info -Projection_20 100.00 root tpch.customer.c_name, tpch.customer.c_custkey, tpch.orders.o_orderkey, tpch.orders.o_orderdate, tpch.orders.o_totalprice, 13_col_0 -└─TopN_23 100.00 root tpch.orders.o_totalprice:desc, tpch.orders.o_orderdate:asc, offset:0, count:100 - └─HashAgg_26 60000000.00 root group by:tpch.customer.c_custkey, tpch.customer.c_name, tpch.orders.o_orderdate, tpch.orders.o_orderkey, tpch.orders.o_totalprice, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.customer.c_custkey), firstrow(tpch.customer.c_name), firstrow(tpch.orders.o_orderkey), firstrow(tpch.orders.o_totalprice), firstrow(tpch.orders.o_orderdate) - └─HashLeftJoin_27 240004648.80 root semi join, inner:Selection_55, equal:[eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)] - ├─IndexJoin_32 300005811.00 root inner join, inner:IndexLookUp_31, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey - │ ├─HashRightJoin_48 75000000.00 root inner join, inner:TableReader_52, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] - │ │ ├─TableReader_52 7500000.00 root data:TableScan_51 - │ │ │ └─TableScan_51 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false - │ │ └─TableReader_50 75000000.00 root data:TableScan_49 - │ │ └─TableScan_49 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false - │ └─IndexLookUp_31 1.00 root - │ ├─IndexScan_29 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false - │ └─TableScan_30 1.00 cop table:lineitem, keep order:false - └─Selection_55 59251097.60 root gt(sel_agg_2, 314) - └─HashAgg_62 74063872.00 root group by:col_2, funcs:sum(col_0), firstrow(col_1) - └─TableReader_63 74063872.00 root data:HashAgg_56 - └─HashAgg_56 74063872.00 cop group by:tpch.lineitem.l_orderkey, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.lineitem.l_orderkey) - └─TableScan_61 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false +Projection_22 100.00 root tpch.customer.c_name, tpch.customer.c_custkey, tpch.orders.o_orderkey, tpch.orders.o_orderdate, tpch.orders.o_totalprice, 14_col_0 +└─TopN_25 100.00 root tpch.orders.o_totalprice:desc, tpch.orders.o_orderdate:asc, offset:0, count:100 + └─HashAgg_28 75000000.00 root group by:tpch.customer.c_custkey, tpch.customer.c_name, tpch.orders.o_orderdate, tpch.orders.o_orderkey, tpch.orders.o_totalprice, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.customer.c_custkey), firstrow(tpch.customer.c_name), firstrow(tpch.orders.o_orderkey), firstrow(tpch.orders.o_totalprice), firstrow(tpch.orders.o_orderdate) + └─HashLeftJoin_29 237008981.18 root inner join, inner:Selection_58, equal:[eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)] + ├─IndexJoin_35 300005811.00 root inner join, inner:IndexLookUp_34, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey + │ ├─HashRightJoin_51 75000000.00 root inner join, inner:TableReader_55, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] + │ │ ├─TableReader_55 7500000.00 root data:TableScan_54 + │ │ │ └─TableScan_54 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false + │ │ └─TableReader_53 75000000.00 root data:TableScan_52 + │ │ └─TableScan_52 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false + │ └─IndexLookUp_34 1.00 root + │ ├─IndexScan_32 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false + │ └─TableScan_33 1.00 cop table:lineitem, keep order:false + └─Selection_58 59251097.60 root gt(sel_agg_2, 314) + └─HashAgg_65 74063872.00 root group by:col_2, funcs:sum(col_0), firstrow(col_1) + └─TableReader_66 74063872.00 root data:HashAgg_59 + └─HashAgg_59 74063872.00 cop group by:tpch.lineitem.l_orderkey, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.lineitem.l_orderkey) + └─TableScan_64 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false /* Q19 Discounted Revenue Query The Discounted Revenue Query reports the gross discounted revenue attributed to the sale of selected parts handled @@ -1140,29 +1140,30 @@ and n_name = 'ALGERIA' order by s_name; id count task operator info -Sort_23 16000.00 root tpch.supplier.s_name:asc -└─Projection_25 16000.00 root tpch.supplier.s_name, tpch.supplier.s_address - └─HashLeftJoin_26 16000.00 root semi join, inner:Projection_38, equal:[eq(tpch.supplier.s_suppkey, tpch.partsupp.ps_suppkey)] - ├─HashRightJoin_32 20000.00 root inner join, inner:TableReader_37, equal:[eq(tpch.nation.n_nationkey, tpch.supplier.s_nationkey)] - │ ├─TableReader_37 1.00 root data:Selection_36 - │ │ └─Selection_36 1.00 cop eq(tpch.nation.n_name, "ALGERIA") - │ │ └─TableScan_35 25.00 cop table:nation, range:[-inf,+inf], keep order:false - │ └─TableReader_34 500000.00 root data:TableScan_33 - │ └─TableScan_33 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false - └─Projection_38 6363545.60 root tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, tpch.partsupp.ps_availqty, mul(0.5, 13_col_0) - └─Selection_39 6363545.60 root gt(cast(tpch.partsupp.ps_availqty), mul(0.5, 13_col_0)) - └─HashAgg_42 7954432.00 root group by:tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, funcs:firstrow(tpch.partsupp.ps_partkey), firstrow(tpch.partsupp.ps_suppkey), firstrow(tpch.partsupp.ps_availqty), sum(tpch.lineitem.l_quantity) - └─HashLeftJoin_45 177770004.55 root left outer join, inner:TableReader_66, equal:[eq(tpch.partsupp.ps_partkey, tpch.lineitem.l_partkey) eq(tpch.partsupp.ps_suppkey, tpch.lineitem.l_suppkey)] - ├─MergeJoin_46 32000000.00 root semi join, left key:tpch.partsupp.ps_partkey, right key:tpch.part.p_partkey - │ ├─IndexLookUp_55 40000000.00 root - │ │ ├─IndexScan_53 40000000.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range:[NULL,+inf], keep order:true - │ │ └─TableScan_54 40000000.00 cop table:partsupp, keep order:false - │ └─TableReader_58 80007.93 root data:Selection_57 - │ └─Selection_57 80007.93 cop like(tpch.part.p_name, "green%", 92) - │ └─TableScan_56 10000000.00 cop table:part, range:[-inf,+inf], keep order:true - └─TableReader_66 44189356.65 root data:Selection_65 - └─Selection_65 44189356.65 cop ge(tpch.lineitem.l_shipdate, 1993-01-01 00:00:00.000000), lt(tpch.lineitem.l_shipdate, 1994-01-01) - └─TableScan_64 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false +Sort_26 20000.00 root tpch.supplier.s_name:asc +└─Projection_28 20000.00 root tpch.supplier.s_name, tpch.supplier.s_address + └─HashRightJoin_30 20000.00 root inner join, inner:HashRightJoin_36, equal:[eq(tpch.supplier.s_suppkey, tpch.partsupp.ps_suppkey)] + ├─HashRightJoin_36 20000.00 root inner join, inner:TableReader_41, equal:[eq(tpch.nation.n_nationkey, tpch.supplier.s_nationkey)] + │ ├─TableReader_41 1.00 root data:Selection_40 + │ │ └─Selection_40 1.00 cop eq(tpch.nation.n_name, "ALGERIA") + │ │ └─TableScan_39 25.00 cop table:nation, range:[-inf,+inf], keep order:false + │ └─TableReader_38 500000.00 root data:TableScan_37 + │ └─TableScan_37 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false + └─HashAgg_44 257492.04 root group by:tpch.partsupp.ps_suppkey, funcs:firstrow(tpch.partsupp.ps_suppkey) + └─Projection_45 257492.04 root tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, tpch.partsupp.ps_availqty, tpch.part.p_partkey, mul(0.5, 14_col_0) + └─Selection_46 257492.04 root gt(cast(tpch.partsupp.ps_availqty), mul(0.5, 14_col_0)) + └─HashAgg_49 321865.05 root group by:tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, funcs:firstrow(tpch.partsupp.ps_partkey), firstrow(tpch.partsupp.ps_suppkey), firstrow(tpch.partsupp.ps_availqty), firstrow(tpch.part.p_partkey), sum(tpch.lineitem.l_quantity) + └─HashLeftJoin_52 9711455.06 root left outer join, inner:TableReader_78, equal:[eq(tpch.partsupp.ps_partkey, tpch.lineitem.l_partkey) eq(tpch.partsupp.ps_suppkey, tpch.lineitem.l_suppkey)] + ├─IndexJoin_61 321865.05 root inner join, inner:IndexLookUp_60, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey + │ ├─IndexLookUp_60 1.00 root + │ │ ├─IndexScan_58 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false + │ │ └─TableScan_59 1.00 cop table:partsupp, keep order:false + │ └─TableReader_73 80007.93 root data:Selection_72 + │ └─Selection_72 80007.93 cop like(tpch.part.p_name, "green%", 92) + │ └─TableScan_71 10000000.00 cop table:part, range:[-inf,+inf], keep order:false + └─TableReader_78 44189356.65 root data:Selection_77 + └─Selection_77 44189356.65 cop ge(tpch.lineitem.l_shipdate, 1993-01-01 00:00:00.000000), lt(tpch.lineitem.l_shipdate, 1994-01-01) + └─TableScan_76 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false /* Q21 Suppliers Who Kept Orders Waiting Query This query identifies certain suppliers who were not able to ship required parts in a timely manner. diff --git a/cmd/explaintest/t/explain_easy.test b/cmd/explaintest/t/explain_easy.test index 0254c755322de..a4ddde63df176 100644 --- a/cmd/explaintest/t/explain_easy.test +++ b/cmd/explaintest/t/explain_easy.test @@ -6,7 +6,6 @@ insert into t2 values(1, 0), (2, 1); create table t3 (a bigint, b bigint, c bigint, d bigint); create table t4 (a int, b int, c int, index idx(a, b), primary key(a)); -set @@session.tidb_opt_insubquery_unfold = 1; set @@session.tidb_opt_agg_push_down = 1; explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a ); @@ -35,8 +34,6 @@ explain select if(10, t1.c1, t1.c2) from t1; explain select c1 from t2 union select c1 from t2 union all select c1 from t2; explain select c1 from t2 union all select c1 from t2 union select c1 from t2; -set @@session.tidb_opt_insubquery_unfold = 0; - explain select sum(t1.c1 in (select c1 from t2)) from t1; explain select 1 in (select c2 from t2) from t1; explain select sum(6 in (select c2 from t2)) from t1; diff --git a/cmd/explaintest/t/explain_easy_stats.test b/cmd/explaintest/t/explain_easy_stats.test index c54734c1bde01..53054f0e47dda 100644 --- a/cmd/explaintest/t/explain_easy_stats.test +++ b/cmd/explaintest/t/explain_easy_stats.test @@ -8,7 +8,6 @@ create table t3 (a bigint, b bigint, c bigint, d bigint); load stats 's/explain_easy_stats_t3.json'; create table index_prune(a bigint(20) NOT NULL, b bigint(20) NOT NULL, c tinyint(4) NOT NULL, primary key(a, b), index idx_b_c_a(b, c, a)); load stats 's/explain_easy_stats_index_prune.json'; -set @@session.tidb_opt_insubquery_unfold = 1; set @@session.tidb_opt_agg_push_down = 1; @@ -38,8 +37,6 @@ explain select * from information_schema.columns; explain select c2 = (select c2 from t2 where t1.c1 = t2.c1 order by c1 limit 1) from t1; explain select * from t1 order by c1 desc limit 1; -set @@session.tidb_opt_insubquery_unfold = 0; - # explain select sum(t1.c1 in (select c1 from t2)) from t1; explain select 1 in (select c2 from t2) from t1; # explain select sum(6 in (select c2 from t2)) from t1; From 9f1a980d731709c63147e826b17b5226de2cc3e0 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 29 Aug 2018 15:02:22 +0800 Subject: [PATCH 07/21] variable is not initialized in session. --- sessionctx/variable/session.go | 1 + 1 file changed, 1 insertion(+) diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 2a0baf978c8a8..0b0e46c29b7e3 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -314,6 +314,7 @@ func NewSessionVars() *SessionVars { RetryLimit: DefTiDBRetryLimit, DisableTxnAutoRetry: DefTiDBDisableTxnAutoRetry, DDLReorgPriority: kv.PriorityLow, + AllowInSubqueryRewriting: DefOptInSubqRewriting, } vars.Concurrency = Concurrency{ IndexLookupConcurrency: DefIndexLookupConcurrency, From 72ff6ad806a240057d0e3073c3f3b3891afa944e Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 29 Aug 2018 15:25:10 +0800 Subject: [PATCH 08/21] modify tpch comment. --- cmd/explaintest/r/tpch.result | 2 +- cmd/explaintest/t/tpch.test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index a5869058332f3..bc39756f2c60e 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -984,7 +984,7 @@ The Large Volume Customer Query ranks customers based on their having placed a l quantity orders are defined as those orders whose total quantity is above a certain level. The Large Volume Customer Query finds a list of the top 100 customers who have ever placed large quantity orders. The query lists the customer name, customer key, the order key, date and total price and the quantity for the order. -Planner enhancement: unfold in subquery. +Planner enhancement: cost estimation is not so good, join reorder. The inner subquery's result is only 300+ rows. */ explain select diff --git a/cmd/explaintest/t/tpch.test b/cmd/explaintest/t/tpch.test index 5d313d9cd11fb..6909c72ad7e6b 100644 --- a/cmd/explaintest/t/tpch.test +++ b/cmd/explaintest/t/tpch.test @@ -773,7 +773,7 @@ where The Large Volume Customer Query finds a list of the top 100 customers who have ever placed large quantity orders. The query lists the customer name, customer key, the order key, date and total price and the quantity for the order. - Planner enhancement: unfold in subquery. + Planner enhancement: cost estimation is not so good, join reorder. The inner subquery's result is only 300+ rows. */ explain select From 31372daadc096e0b808a201bb3e4236cb3164194 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 29 Aug 2018 15:35:07 +0800 Subject: [PATCH 09/21] add comment. --- plan/expression_rewriter.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/plan/expression_rewriter.go b/plan/expression_rewriter.go index 4c05c7dd7d267..dc7d4d062b4be 100644 --- a/plan/expression_rewriter.go +++ b/plan/expression_rewriter.go @@ -638,14 +638,18 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, er.err = errors.Trace(err) return v, true } + // If it's not the form of `not in (SUBQUERY)`, has no correlated column and don't need append a scalar value. We can rewrite it to inner join. if er.ctx.GetSessionVars().AllowInSubqueryRewriting && !v.Not && !asScalar && len(np.extractCorrelatedCols()) == 0 { + // We need to try to eliminate the agg and the projection produced by this operation. er.b.optFlag |= flagEliminateAgg er.b.optFlag |= flagEliminateProjection2 + // Build distinct for the inner query. agg := er.b.buildDistinct(np, np.Schema().Len()) for _, col := range agg.schema.Columns { col.IsAggOrSubq = true } eq, left, right, other := extractOnCondition(expression.SplitCNFItems(checkCondition), er.p, agg) + // Build inner join above the aggregation. join := LogicalJoin{ JoinType: InnerJoin, EqualConditions: eq, @@ -655,7 +659,7 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, }.init(er.ctx) join.SetChildren(er.p, agg) join.SetSchema(expression.MergeSchema(er.p.Schema(), agg.schema)) - // Apply forces to choose hash join currently, so don't worry the hints will take effect if the semi join is in one apply. + // Set join hint for this join. if er.b.TableHints() != nil { er.err = join.setPreferredJoinType(er.b.TableHints()) if er.err != nil { From 48018ffdd112b9de87fb1e00e536155ce9e6f3ea Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 29 Aug 2018 15:39:09 +0800 Subject: [PATCH 10/21] fix comment --- plan/expression_rewriter.go | 2 +- sessionctx/variable/session.go | 2 +- sessionctx/variable/tidb_vars.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plan/expression_rewriter.go b/plan/expression_rewriter.go index dc7d4d062b4be..5015c76e464a1 100644 --- a/plan/expression_rewriter.go +++ b/plan/expression_rewriter.go @@ -638,7 +638,7 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, er.err = errors.Trace(err) return v, true } - // If it's not the form of `not in (SUBQUERY)`, has no correlated column and don't need append a scalar value. We can rewrite it to inner join. + // If it's not the form of `not in (SUBQUERY)`, has no correlated column and don't need to append a scalar value. We can rewrite it to inner join. if er.ctx.GetSessionVars().AllowInSubqueryRewriting && !v.Not && !asScalar && len(np.extractCorrelatedCols()) == 0 { // We need to try to eliminate the agg and the projection produced by this operation. er.b.optFlag |= flagEliminateAgg diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 0b0e46c29b7e3..c876e53acec58 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -248,7 +248,7 @@ type SessionVars struct { // AllowAggPushDown can be set to false to forbid aggregation push down. AllowAggPushDown bool - // AllowInSubqueryRewriting can be set to true to fold in subquery + // AllowInSubqueryRewriting can be set to false to forbid rewriting the semi join to inner join with agg. AllowInSubqueryRewriting bool // CurrInsertValues is used to record current ValuesExpr's values. diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index f242f9e5619e6..1098d599a716c 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -113,7 +113,7 @@ const ( // If the query has a LIMIT clause, high concurrency makes the system do much more work than needed. TiDBDistSQLScanConcurrency = "tidb_distsql_scan_concurrency" - // tidb_opt_insubquery_rewriting is used to enable/disable the optimizer rule of in subquery unfold. + // tidb_opt_insubquery_rewriting is used to enable/disable the optimizer rule of rewriting IN subquery. TiDBOptInSubqRewriting = "tidb_opt_insubquery_rewriting" // tidb_index_join_batch_size is used to set the batch size of a index lookup join. From 8c8ed423fbde861ccbcac0f97c5ff3d64466e509 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 29 Aug 2018 16:18:02 +0800 Subject: [PATCH 11/21] fix gofmt. --- plan/rule_aggregation_elimination.go | 1 - 1 file changed, 1 deletion(-) diff --git a/plan/rule_aggregation_elimination.go b/plan/rule_aggregation_elimination.go index d3da642e22451..4aa26b91af8c1 100644 --- a/plan/rule_aggregation_elimination.go +++ b/plan/rule_aggregation_elimination.go @@ -123,4 +123,3 @@ func (a *aggregationEliminater) optimize(p LogicalPlan) (LogicalPlan, error) { } return p, nil } - From 81ba7ecfe92f93d5c9ce3357a7b9a8e0067ac1aa Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 12 Sep 2018 15:51:16 +0800 Subject: [PATCH 12/21] extract same part into struct --- plan/optimizer.go | 2 +- plan/rule_aggregation_elimination.go | 18 +++--- plan/rule_aggregation_push_down.go | 83 +--------------------------- 3 files changed, 13 insertions(+), 90 deletions(-) diff --git a/plan/optimizer.go b/plan/optimizer.go index 2446b7f7b8460..ee7f4973f9cb5 100644 --- a/plan/optimizer.go +++ b/plan/optimizer.go @@ -46,7 +46,7 @@ var optRuleList = []logicalOptRule{ &columnPruner{}, &projectionEliminater{}, &buildKeySolver{}, - &aggregationEliminater{}, + &aggregationRecursiveEliminater{}, &decorrelateSolver{}, &projectionEliminater{}, &maxMinEliminator{}, diff --git a/plan/rule_aggregation_elimination.go b/plan/rule_aggregation_elimination.go index 4aa26b91af8c1..9ba17f83903ba 100644 --- a/plan/rule_aggregation_elimination.go +++ b/plan/rule_aggregation_elimination.go @@ -22,14 +22,18 @@ import ( "github.com/pingcap/tidb/types" ) -type aggregationEliminater struct { +type aggregationRecursiveEliminater struct { + aggregationEliminateChecker +} + +type aggregationEliminateChecker struct { } // tryToEliminateAggregation will eliminate aggregation grouped by unique key. // e.g. select min(b) from t group by a. If a is a unique key, then this sql is equal to `select b from t group by a`. // For count(expr), sum(expr), avg(expr), count(distinct expr, [expr...]) we may need to rewrite the expr. Details are shown below. // If we can eliminate agg successful, we return a projection. Else we return a nil pointer. -func (a *aggregationEliminater) tryToEliminateAggregation(agg *LogicalAggregation) *LogicalProjection { +func (a *aggregationEliminateChecker) tryToEliminateAggregation(agg *LogicalAggregation) *LogicalProjection { schemaByGroupby := expression.NewSchema(agg.groupByCols...) coveredByUniqueKey := false for _, key := range agg.children[0].Schema().Keys { @@ -47,7 +51,7 @@ func (a *aggregationEliminater) tryToEliminateAggregation(agg *LogicalAggregatio return nil } -func (a *aggregationEliminater) convertAggToProj(agg *LogicalAggregation) *LogicalProjection { +func (a *aggregationEliminateChecker) convertAggToProj(agg *LogicalAggregation) *LogicalProjection { proj := LogicalProjection{ Exprs: make([]expression.Expression, 0, len(agg.AggFuncs)), }.init(agg.ctx) @@ -60,7 +64,7 @@ func (a *aggregationEliminater) convertAggToProj(agg *LogicalAggregation) *Logic } // rewriteExpr will rewrite the aggregate function to expression doesn't contain aggregate function. -func (a *aggregationEliminater) rewriteExpr(ctx sessionctx.Context, aggFunc *aggregation.AggFuncDesc) expression.Expression { +func (a *aggregationEliminateChecker) rewriteExpr(ctx sessionctx.Context, aggFunc *aggregation.AggFuncDesc) expression.Expression { switch aggFunc.Name { case ast.AggFuncCount: if aggFunc.Mode == aggregation.FinalMode { @@ -75,7 +79,7 @@ func (a *aggregationEliminater) rewriteExpr(ctx sessionctx.Context, aggFunc *agg } } -func (a *aggregationEliminater) rewriteCount(ctx sessionctx.Context, exprs []expression.Expression) expression.Expression { +func (a *aggregationEliminateChecker) rewriteCount(ctx sessionctx.Context, exprs []expression.Expression) expression.Expression { // If is count(expr), we will change it to if(isnull(expr), 0, 1). // If is count(distinct x, y, z) we will change it to if(isnull(x) or isnull(y) or isnull(z), 0, 1). isNullExprs := make([]expression.Expression, 0, len(exprs)) @@ -91,7 +95,7 @@ func (a *aggregationEliminater) rewriteCount(ctx sessionctx.Context, exprs []exp // See https://dev.mysql.com/doc/refman/5.7/en/group-by-functions.html // The SUM() and AVG() functions return a DECIMAL value for exact-value arguments (integer or DECIMAL), // and a DOUBLE value for approximate-value arguments (FLOAT or DOUBLE). -func (a *aggregationEliminater) rewriteSumOrAvg(ctx sessionctx.Context, exprs []expression.Expression) expression.Expression { +func (a *aggregationEliminateChecker) rewriteSumOrAvg(ctx sessionctx.Context, exprs []expression.Expression) expression.Expression { // FIXME: Consider the case that avg is final mode. expr := exprs[0] switch expr.GetType().Tp { @@ -107,7 +111,7 @@ func (a *aggregationEliminater) rewriteSumOrAvg(ctx sessionctx.Context, exprs [] } } -func (a *aggregationEliminater) optimize(p LogicalPlan) (LogicalPlan, error) { +func (a *aggregationRecursiveEliminater) optimize(p LogicalPlan) (LogicalPlan, error) { newChildren := make([]LogicalPlan, 0, len(p.Children())) for _, child := range p.Children() { newChild, _ := a.optimize(child) diff --git a/plan/rule_aggregation_push_down.go b/plan/rule_aggregation_push_down.go index 3089f1d5c9cfd..6fa0adde4f69d 100644 --- a/plan/rule_aggregation_push_down.go +++ b/plan/rule_aggregation_push_down.go @@ -25,6 +25,7 @@ import ( ) type aggregationOptimizer struct { + aggregationEliminateChecker } // isDecomposable checks if an aggregate function is decomposable. An aggregation function $F$ is decomposable @@ -383,85 +384,3 @@ func (a *aggregationOptimizer) aggPushDown(p LogicalPlan) LogicalPlan { p.SetChildren(newChildren...) return p } - -// tryToEliminateAggregation will eliminate aggregation grouped by unique key. -// e.g. select min(b) from t group by a. If a is a unique key, then this sql is equal to `select b from t group by a`. -// For count(expr), sum(expr), avg(expr), count(distinct expr, [expr...]) we may need to rewrite the expr. Details are shown below. -// If we can eliminate agg successful, we return a projection. Else we return a nil pointer. -func (a *aggregationOptimizer) tryToEliminateAggregation(agg *LogicalAggregation) *LogicalProjection { - schemaByGroupby := expression.NewSchema(agg.groupByCols...) - coveredByUniqueKey := false - for _, key := range agg.children[0].Schema().Keys { - if schemaByGroupby.ColumnsIndices(key) != nil { - coveredByUniqueKey = true - break - } - } - if coveredByUniqueKey { - // GroupByCols has unique key, so this aggregation can be removed. - proj := a.convertAggToProj(agg) - proj.SetChildren(agg.children[0]) - return proj - } - return nil -} - -func (a *aggregationOptimizer) convertAggToProj(agg *LogicalAggregation) *LogicalProjection { - proj := LogicalProjection{ - Exprs: make([]expression.Expression, 0, len(agg.AggFuncs)), - }.init(agg.ctx) - for _, fun := range agg.AggFuncs { - expr := a.rewriteExpr(agg.ctx, fun) - proj.Exprs = append(proj.Exprs, expr) - } - proj.SetSchema(agg.schema.Clone()) - return proj -} - -func (a *aggregationOptimizer) rewriteCount(ctx sessionctx.Context, exprs []expression.Expression) expression.Expression { - // If is count(expr), we will change it to if(isnull(expr), 0, 1). - // If is count(distinct x, y, z) we will change it to if(isnull(x) or isnull(y) or isnull(z), 0, 1). - isNullExprs := make([]expression.Expression, 0, len(exprs)) - for _, expr := range exprs { - isNullExpr := expression.NewFunctionInternal(ctx, ast.IsNull, types.NewFieldType(mysql.TypeTiny), expr) - isNullExprs = append(isNullExprs, isNullExpr) - } - innerExpr := expression.ComposeDNFCondition(ctx, isNullExprs...) - newExpr := expression.NewFunctionInternal(ctx, ast.If, types.NewFieldType(mysql.TypeLonglong), innerExpr, expression.Zero, expression.One) - return newExpr -} - -// See https://dev.mysql.com/doc/refman/5.7/en/group-by-functions.html -// The SUM() and AVG() functions return a DECIMAL value for exact-value arguments (integer or DECIMAL), -// and a DOUBLE value for approximate-value arguments (FLOAT or DOUBLE). -func (a *aggregationOptimizer) rewriteSumOrAvg(ctx sessionctx.Context, exprs []expression.Expression) expression.Expression { - // FIXME: Consider the case that avg is final mode. - expr := exprs[0] - switch expr.GetType().Tp { - // Integer type should be cast to decimal. - case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong: - return expression.BuildCastFunction(ctx, expr, types.NewFieldType(mysql.TypeNewDecimal)) - // Double and Decimal doesn't need to be cast. - case mysql.TypeDouble, mysql.TypeNewDecimal: - return expr - // Float should be cast to double. And other non-numeric type should be cast to double too. - default: - return expression.BuildCastFunction(ctx, expr, types.NewFieldType(mysql.TypeDouble)) - } -} - -// rewriteExpr will rewrite the aggregate function to expression doesn't contain aggregate function. -func (a *aggregationOptimizer) rewriteExpr(ctx sessionctx.Context, aggFunc *aggregation.AggFuncDesc) expression.Expression { - switch aggFunc.Name { - case ast.AggFuncCount: - if aggFunc.Mode == aggregation.FinalMode { - return a.rewriteSumOrAvg(ctx, aggFunc.Args) - } - return a.rewriteCount(ctx, aggFunc.Args) - case ast.AggFuncSum, ast.AggFuncAvg: - return a.rewriteSumOrAvg(ctx, aggFunc.Args) - default: - // Default we do nothing about expr. - return aggFunc.Args[0] - } -} From d21347bdd634eef0c8e56740e7df607c3ed8af5f Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 12 Sep 2018 15:56:41 +0800 Subject: [PATCH 13/21] rename variable --- plan/expression_rewriter.go | 2 +- session/session.go | 2 +- sessionctx/variable/session.go | 10 +++++----- sessionctx/variable/sysvar.go | 2 +- sessionctx/variable/tidb_vars.go | 4 ++-- sessionctx/variable/varsutil.go | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/plan/expression_rewriter.go b/plan/expression_rewriter.go index 5015c76e464a1..3b2c4f22a06fb 100644 --- a/plan/expression_rewriter.go +++ b/plan/expression_rewriter.go @@ -639,7 +639,7 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, return v, true } // If it's not the form of `not in (SUBQUERY)`, has no correlated column and don't need to append a scalar value. We can rewrite it to inner join. - if er.ctx.GetSessionVars().AllowInSubqueryRewriting && !v.Not && !asScalar && len(np.extractCorrelatedCols()) == 0 { + if er.ctx.GetSessionVars().AllowInSubqToJoinAndAgg && !v.Not && !asScalar && len(np.extractCorrelatedCols()) == 0 { // We need to try to eliminate the agg and the projection produced by this operation. er.b.optFlag |= flagEliminateAgg er.b.optFlag |= flagEliminateProjection2 diff --git a/session/session.go b/session/session.go index 3c7ad6b59e74c..056092aa7c384 100644 --- a/session/session.go +++ b/session/session.go @@ -1247,7 +1247,7 @@ const loadCommonGlobalVarsSQL = "select HIGH_PRIORITY * from mysql.global_variab variable.TiDBHashAggFinalConcurrency + quoteCommaQuote + variable.TiDBBackoffLockFast + quoteCommaQuote + variable.TiDBDDLReorgWorkerCount + quoteCommaQuote + - variable.TiDBOptInSubqRewriting + quoteCommaQuote + + variable.TiDBOptInSubqToJoinAndAgg + quoteCommaQuote + variable.TiDBDistSQLScanConcurrency + quoteCommaQuote + variable.TiDBMaxChunkSize + quoteCommaQuote + variable.TiDBRetryLimit + quoteCommaQuote + diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 551baa593849b..30d3f2fa8256d 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -248,8 +248,8 @@ type SessionVars struct { // AllowAggPushDown can be set to false to forbid aggregation push down. AllowAggPushDown bool - // AllowInSubqueryRewriting can be set to false to forbid rewriting the semi join to inner join with agg. - AllowInSubqueryRewriting bool + // AllowInSubqToJoinAndAgg can be set to false to forbid rewriting the semi join to inner join with agg. + AllowInSubqToJoinAndAgg bool // CurrInsertValues is used to record current ValuesExpr's values. // See http://dev.mysql.com/doc/refman/5.7/en/miscellaneous-functions.html#function_values @@ -314,7 +314,7 @@ func NewSessionVars() *SessionVars { RetryLimit: DefTiDBRetryLimit, DisableTxnAutoRetry: DefTiDBDisableTxnAutoRetry, DDLReorgPriority: kv.PriorityLow, - AllowInSubqueryRewriting: DefOptInSubqRewriting, + AllowInSubqToJoinAndAgg: DefOptInSubqToJoinAndAgg, } vars.Concurrency = Concurrency{ IndexLookupConcurrency: DefIndexLookupConcurrency, @@ -510,8 +510,8 @@ func (s *SessionVars) SetSystemVar(name string, val string) error { s.SkipUTF8Check = TiDBOptOn(val) case TiDBOptAggPushDown: s.AllowAggPushDown = TiDBOptOn(val) - case TiDBOptInSubqRewriting: - s.AllowInSubqueryRewriting = TiDBOptOn(val) + case TiDBOptInSubqToJoinAndAgg: + s.AllowInSubqToJoinAndAgg = TiDBOptOn(val) case TiDBIndexLookupConcurrency: s.IndexLookupConcurrency = tidbOptPositiveInt32(val, DefIndexLookupConcurrency) case TiDBIndexLookupJoinConcurrency: diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index 1469f5cccd485..2898c238a7b22 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -627,7 +627,7 @@ var defaultSysVars = []*SysVar{ {ScopeGlobal, TiDBAutoAnalyzeEndTime, DefAutoAnalyzeEndTime}, {ScopeSession, TiDBChecksumTableConcurrency, strconv.Itoa(DefChecksumTableConcurrency)}, {ScopeGlobal | ScopeSession, TiDBDistSQLScanConcurrency, strconv.Itoa(DefDistSQLScanConcurrency)}, - {ScopeGlobal | ScopeSession, TiDBOptInSubqRewriting, boolToIntStr(DefOptInSubqRewriting)}, + {ScopeGlobal | ScopeSession, TiDBOptInSubqToJoinAndAgg, boolToIntStr(DefOptInSubqToJoinAndAgg)}, {ScopeGlobal | ScopeSession, TiDBIndexJoinBatchSize, strconv.Itoa(DefIndexJoinBatchSize)}, {ScopeGlobal | ScopeSession, TiDBIndexLookupSize, strconv.Itoa(DefIndexLookupSize)}, {ScopeGlobal | ScopeSession, TiDBIndexLookupConcurrency, strconv.Itoa(DefIndexLookupConcurrency)}, diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 9b3c5869c10b1..9394410e75d89 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -118,7 +118,7 @@ const ( TiDBDistSQLScanConcurrency = "tidb_distsql_scan_concurrency" // tidb_opt_insubquery_rewriting is used to enable/disable the optimizer rule of rewriting IN subquery. - TiDBOptInSubqRewriting = "tidb_opt_insubquery_rewriting" + TiDBOptInSubqToJoinAndAgg = "tidb_opt_insubq_to_join_and_agg" // tidb_index_join_batch_size is used to set the batch size of a index lookup join. // The index lookup join fetches batches of data from outer executor and constructs ranges for inner executor. @@ -198,7 +198,7 @@ const ( DefChecksumTableConcurrency = 4 DefSkipUTF8Check = false DefOptAggPushDown = false - DefOptInSubqRewriting = true + DefOptInSubqToJoinAndAgg = true DefBatchInsert = false DefBatchDelete = false DefCurretTS = 0 diff --git a/sessionctx/variable/varsutil.go b/sessionctx/variable/varsutil.go index 268401351f757..ee093bc8f2a68 100644 --- a/sessionctx/variable/varsutil.go +++ b/sessionctx/variable/varsutil.go @@ -278,7 +278,7 @@ func ValidateSetSystemVar(vars *SessionVars, name string, value string) (string, } return value, ErrWrongValueForVar.GenByArgs(name, value) case AutocommitVar, TiDBSkipUTF8Check, TiDBOptAggPushDown, - TiDBOptInSubqRewriting, TiDBEnableTablePartition, + TiDBOptInSubqToJoinAndAgg, TiDBEnableTablePartition, TiDBBatchInsert, TiDBDisableTxnAutoRetry, TiDBEnableStreaming, TiDBBatchDelete: if strings.EqualFold(value, "ON") || value == "1" || strings.EqualFold(value, "OFF") || value == "0" { From 1d5523c027956b9bc2654d7496d4f3914d3061ca Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 12 Sep 2018 17:23:41 +0800 Subject: [PATCH 14/21] fix test --- executor/aggregate_test.go | 4 ++-- executor/join_test.go | 4 ++-- sessionctx/variable/tidb_vars.go | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/executor/aggregate_test.go b/executor/aggregate_test.go index 51d3f15806036..0f8f77c391168 100644 --- a/executor/aggregate_test.go +++ b/executor/aggregate_test.go @@ -248,10 +248,10 @@ func (s *testSuite) TestAggregation(c *C) { tk.MustExec("create table t2 (c1 int)") tk.MustExec("insert into t1 values(3), (2)") tk.MustExec("insert into t2 values(1), (2)") - tk.MustExec("set @@session.tidb_opt_insubquery_rewriting = 0") + tk.MustExec("set @@session.tidb_opt_insubq_to_join_and_agg = 0") result = tk.MustQuery("select sum(c1 in (select * from t2)) from t1") result.Check(testkit.Rows("1")) - tk.MustExec("set @@session.tidb_opt_insubquery_rewriting = 1") + tk.MustExec("set @@session.tidb_opt_insubq_to_join_and_agg = 1") result = tk.MustQuery("select sum(c1 in (select * from t2)) from t1") result.Check(testkit.Rows("1")) result = tk.MustQuery("select sum(c1) k from (select * from t1 union all select * from t2)t group by c1 * 2 order by k") diff --git a/executor/join_test.go b/executor/join_test.go index 03471b17703f2..4a416a3d60ae3 100644 --- a/executor/join_test.go +++ b/executor/join_test.go @@ -726,14 +726,14 @@ func (s *testSuite) TestInSubquery(c *C) { tk.MustExec("create table t2 (a int)") tk.MustExec("insert into t1 values (1),(2)") tk.MustExec("insert into t2 values (1),(2)") - tk.MustExec("set @@session.tidb_opt_insubquery_rewriting = 0") + tk.MustExec("set @@session.tidb_opt_insubq_to_join_and_agg = 0") result = tk.MustQuery("select * from t1 where a in (select * from t2)") result.Sort().Check(testkit.Rows("1", "2")) result = tk.MustQuery("select * from t1 where a in (select * from t2 where false)") result.Check(testkit.Rows()) result = tk.MustQuery("select * from t1 where a not in (select * from t2 where false)") result.Sort().Check(testkit.Rows("1", "2")) - tk.MustExec("set @@session.tidb_opt_insubquery_rewriting = 1") + tk.MustExec("set @@session.tidb_opt_insubq_to_join_and_agg = 1") result = tk.MustQuery("select * from t1 where a in (select * from t2)") result.Sort().Check(testkit.Rows("1", "2")) result = tk.MustQuery("select * from t1 where a in (select * from t2 where false)") diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 9394410e75d89..88ac50e8a7b65 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -117,7 +117,7 @@ const ( // If the query has a LIMIT clause, high concurrency makes the system do much more work than needed. TiDBDistSQLScanConcurrency = "tidb_distsql_scan_concurrency" - // tidb_opt_insubquery_rewriting is used to enable/disable the optimizer rule of rewriting IN subquery. + // tidb_opt_insubquery_to_join_and_agg is used to enable/disable the optimizer rule of rewriting IN subquery. TiDBOptInSubqToJoinAndAgg = "tidb_opt_insubq_to_join_and_agg" // tidb_index_join_batch_size is used to set the batch size of a index lookup join. From f26b701f753ac93e4dc17ef8aeab9895860a33c6 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Mon, 8 Oct 2018 20:16:44 +0800 Subject: [PATCH 15/21] delete unused file --- plan/rule_aggregation_elimination.go | 129 --------------------------- 1 file changed, 129 deletions(-) delete mode 100644 plan/rule_aggregation_elimination.go diff --git a/plan/rule_aggregation_elimination.go b/plan/rule_aggregation_elimination.go deleted file mode 100644 index 9ba17f83903ba..0000000000000 --- a/plan/rule_aggregation_elimination.go +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2018 PingCAP, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// See the License for the specific language governing permissions and -// limitations under the License. - -package plan - -import ( - "github.com/pingcap/tidb/ast" - "github.com/pingcap/tidb/expression" - "github.com/pingcap/tidb/expression/aggregation" - "github.com/pingcap/tidb/mysql" - "github.com/pingcap/tidb/sessionctx" - "github.com/pingcap/tidb/types" -) - -type aggregationRecursiveEliminater struct { - aggregationEliminateChecker -} - -type aggregationEliminateChecker struct { -} - -// tryToEliminateAggregation will eliminate aggregation grouped by unique key. -// e.g. select min(b) from t group by a. If a is a unique key, then this sql is equal to `select b from t group by a`. -// For count(expr), sum(expr), avg(expr), count(distinct expr, [expr...]) we may need to rewrite the expr. Details are shown below. -// If we can eliminate agg successful, we return a projection. Else we return a nil pointer. -func (a *aggregationEliminateChecker) tryToEliminateAggregation(agg *LogicalAggregation) *LogicalProjection { - schemaByGroupby := expression.NewSchema(agg.groupByCols...) - coveredByUniqueKey := false - for _, key := range agg.children[0].Schema().Keys { - if schemaByGroupby.ColumnsIndices(key) != nil { - coveredByUniqueKey = true - break - } - } - if coveredByUniqueKey { - // GroupByCols has unique key, so this aggregation can be removed. - proj := a.convertAggToProj(agg) - proj.SetChildren(agg.children[0]) - return proj - } - return nil -} - -func (a *aggregationEliminateChecker) convertAggToProj(agg *LogicalAggregation) *LogicalProjection { - proj := LogicalProjection{ - Exprs: make([]expression.Expression, 0, len(agg.AggFuncs)), - }.init(agg.ctx) - for _, fun := range agg.AggFuncs { - expr := a.rewriteExpr(agg.ctx, fun) - proj.Exprs = append(proj.Exprs, expr) - } - proj.SetSchema(agg.schema.Clone()) - return proj -} - -// rewriteExpr will rewrite the aggregate function to expression doesn't contain aggregate function. -func (a *aggregationEliminateChecker) rewriteExpr(ctx sessionctx.Context, aggFunc *aggregation.AggFuncDesc) expression.Expression { - switch aggFunc.Name { - case ast.AggFuncCount: - if aggFunc.Mode == aggregation.FinalMode { - return a.rewriteSumOrAvg(ctx, aggFunc.Args) - } - return a.rewriteCount(ctx, aggFunc.Args) - case ast.AggFuncSum, ast.AggFuncAvg: - return a.rewriteSumOrAvg(ctx, aggFunc.Args) - default: - // Default we do nothing about expr. - return aggFunc.Args[0] - } -} - -func (a *aggregationEliminateChecker) rewriteCount(ctx sessionctx.Context, exprs []expression.Expression) expression.Expression { - // If is count(expr), we will change it to if(isnull(expr), 0, 1). - // If is count(distinct x, y, z) we will change it to if(isnull(x) or isnull(y) or isnull(z), 0, 1). - isNullExprs := make([]expression.Expression, 0, len(exprs)) - for _, expr := range exprs { - isNullExpr := expression.NewFunctionInternal(ctx, ast.IsNull, types.NewFieldType(mysql.TypeTiny), expr) - isNullExprs = append(isNullExprs, isNullExpr) - } - innerExpr := expression.ComposeDNFCondition(ctx, isNullExprs...) - newExpr := expression.NewFunctionInternal(ctx, ast.If, types.NewFieldType(mysql.TypeLonglong), innerExpr, expression.Zero, expression.One) - return newExpr -} - -// See https://dev.mysql.com/doc/refman/5.7/en/group-by-functions.html -// The SUM() and AVG() functions return a DECIMAL value for exact-value arguments (integer or DECIMAL), -// and a DOUBLE value for approximate-value arguments (FLOAT or DOUBLE). -func (a *aggregationEliminateChecker) rewriteSumOrAvg(ctx sessionctx.Context, exprs []expression.Expression) expression.Expression { - // FIXME: Consider the case that avg is final mode. - expr := exprs[0] - switch expr.GetType().Tp { - // Integer type should be cast to decimal. - case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong: - return expression.BuildCastFunction(ctx, expr, types.NewFieldType(mysql.TypeNewDecimal)) - // Double and Decimal doesn't need to be cast. - case mysql.TypeDouble, mysql.TypeNewDecimal: - return expr - // Float should be cast to double. And other non-numeric type should be cast to double too. - default: - return expression.BuildCastFunction(ctx, expr, types.NewFieldType(mysql.TypeDouble)) - } -} - -func (a *aggregationRecursiveEliminater) optimize(p LogicalPlan) (LogicalPlan, error) { - newChildren := make([]LogicalPlan, 0, len(p.Children())) - for _, child := range p.Children() { - newChild, _ := a.optimize(child) - newChildren = append(newChildren, newChild) - } - p.SetChildren(newChildren...) - agg, ok := p.(*LogicalAggregation) - if !ok { - return p, nil - } - if proj := a.tryToEliminateAggregation(agg); proj != nil { - return proj, nil - } - return p, nil -} From 260a9511cbabf942fd3f889e635163a446b4de9c Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Mon, 8 Oct 2018 20:21:43 +0800 Subject: [PATCH 16/21] fix the wrong test --- cmd/explaintest/r/tpch.result | 36 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index bf7e62818c57c..500eb3ccf071e 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -1024,7 +1024,7 @@ id count task operator info Projection_22 100.00 root tpch.customer.c_name, tpch.customer.c_custkey, tpch.orders.o_orderkey, tpch.orders.o_orderdate, tpch.orders.o_totalprice, 14_col_0 └─TopN_25 100.00 root tpch.orders.o_totalprice:desc, tpch.orders.o_orderdate:asc, offset:0, count:100 └─HashAgg_28 75000000.00 root group by:tpch.customer.c_custkey, tpch.customer.c_name, tpch.orders.o_orderdate, tpch.orders.o_orderkey, tpch.orders.o_totalprice, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.customer.c_custkey), firstrow(tpch.customer.c_name), firstrow(tpch.orders.o_orderkey), firstrow(tpch.orders.o_totalprice), firstrow(tpch.orders.o_orderdate) - └─HashLeftJoin_29 237008981.18 root inner join, inner:Projection_58, equal:[eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)] + └─HashLeftJoin_29 237008981.18 root inner join, inner:Selection_58, equal:[eq(tpch.orders.o_orderkey, tpch.lineitem.l_orderkey)] ├─IndexJoin_35 300005811.00 root inner join, inner:IndexLookUp_34, outer key:tpch.orders.o_orderkey, inner key:tpch.lineitem.l_orderkey │ ├─HashRightJoin_51 75000000.00 root inner join, inner:TableReader_55, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)] │ │ ├─TableReader_55 7500000.00 root data:TableScan_54 @@ -1034,12 +1034,11 @@ Projection_22 100.00 root tpch.customer.c_name, tpch.customer.c_custkey, tpch.or │ └─IndexLookUp_34 1.00 root │ ├─IndexScan_32 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false │ └─TableScan_33 1.00 cop table:lineitem, keep order:false - └─Projection_58 59251097.60 root tpch.lineitem.l_orderkey - └─Selection_59 59251097.60 root gt(sel_agg_2, 314) - └─HashAgg_66 74063872.00 root group by:col_2, funcs:sum(col_0), firstrow(col_1) - └─TableReader_67 74063872.00 root data:HashAgg_60 - └─HashAgg_60 74063872.00 cop group by:tpch.lineitem.l_orderkey, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.lineitem.l_orderkey) - └─TableScan_65 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false + └─Selection_58 59251097.60 root gt(sel_agg_2, 314) + └─HashAgg_65 74063872.00 root group by:col_2, funcs:sum(col_0), firstrow(col_1) + └─TableReader_66 74063872.00 root data:HashAgg_59 + └─HashAgg_59 74063872.00 cop group by:tpch.lineitem.l_orderkey, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.lineitem.l_orderkey) + └─TableScan_64 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false /* Q19 Discounted Revenue Query The Discounted Revenue Query reports the gross discounted revenue attributed to the sale of selected parts handled @@ -1154,18 +1153,17 @@ Sort_26 20000.00 root tpch.supplier.s_name:asc └─Projection_45 257492.04 root tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, tpch.partsupp.ps_availqty, tpch.part.p_partkey, mul(0.5, 14_col_0) └─Selection_46 257492.04 root gt(cast(tpch.partsupp.ps_availqty), mul(0.5, 14_col_0)) └─HashAgg_49 321865.05 root group by:tpch.partsupp.ps_partkey, tpch.partsupp.ps_suppkey, funcs:firstrow(tpch.partsupp.ps_partkey), firstrow(tpch.partsupp.ps_suppkey), firstrow(tpch.partsupp.ps_availqty), firstrow(tpch.part.p_partkey), sum(tpch.lineitem.l_quantity) - └─HashLeftJoin_52 9711455.06 root left outer join, inner:TableReader_76, equal:[eq(tpch.partsupp.ps_partkey, tpch.lineitem.l_partkey) eq(tpch.partsupp.ps_suppkey, tpch.lineitem.l_suppkey)] - ├─IndexJoin_57 321865.05 root inner join, inner:IndexLookUp_56, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey - │ ├─IndexLookUp_56 1.00 root - │ │ ├─IndexScan_54 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false - │ │ └─TableScan_55 1.00 cop table:partsupp, keep order:false - │ └─Projection_68 80007.93 root tpch.part.p_partkey - │ └─TableReader_71 80007.93 root data:Selection_70 - │ └─Selection_70 80007.93 cop like(tpch.part.p_name, "green%", 92) - │ └─TableScan_69 10000000.00 cop table:part, range:[-inf,+inf], keep order:false - └─TableReader_76 44189356.65 root data:Selection_75 - └─Selection_75 44189356.65 cop ge(tpch.lineitem.l_shipdate, 1993-01-01 00:00:00.000000), lt(tpch.lineitem.l_shipdate, 1994-01-01) - └─TableScan_74 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false + └─HashLeftJoin_52 9711455.06 root left outer join, inner:TableReader_78, equal:[eq(tpch.partsupp.ps_partkey, tpch.lineitem.l_partkey) eq(tpch.partsupp.ps_suppkey, tpch.lineitem.l_suppkey)] + ├─IndexJoin_61 321865.05 root inner join, inner:IndexLookUp_60, outer key:tpch.part.p_partkey, inner key:tpch.partsupp.ps_partkey + │ ├─IndexLookUp_60 1.00 root + │ │ ├─IndexScan_58 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false + │ │ └─TableScan_59 1.00 cop table:partsupp, keep order:false + │ └─TableReader_73 80007.93 root data:Selection_72 + │ └─Selection_72 80007.93 cop like(tpch.part.p_name, "green%", 92) + │ └─TableScan_71 10000000.00 cop table:part, range:[-inf,+inf], keep order:false + └─TableReader_78 44189356.65 root data:Selection_77 + └─Selection_77 44189356.65 cop ge(tpch.lineitem.l_shipdate, 1993-01-01 00:00:00.000000), lt(tpch.lineitem.l_shipdate, 1994-01-01) + └─TableScan_76 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false /* Q21 Suppliers Who Kept Orders Waiting Query This query identifies certain suppliers who were not able to ship required parts in a timely manner. From 091b5b70e782a3ea6a5f98e128ed126ffbde1c96 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Mon, 22 Oct 2018 19:39:35 +0800 Subject: [PATCH 17/21] fix panic in test. --- planner/core/logical_plan_builder.go | 1 + 1 file changed, 1 insertion(+) diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index c8a1259f55ba7..18bc2eefca5aa 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -77,6 +77,7 @@ func (b *PlanBuilder) buildAggregation(p LogicalPlan, aggFuncList []*ast.Aggrega // when we eliminate the max and min we may add `is not null` filter. b.optFlag = b.optFlag | flagPredicatePushDown b.optFlag = b.optFlag | flagEliminateAgg + b.optFlag = b.optFlag | flagEliminateProjection2 plan4Agg := LogicalAggregation{AggFuncs: make([]*aggregation.AggFuncDesc, 0, len(aggFuncList))}.init(b.ctx) schema4Agg := expression.NewSchema(make([]*expression.Column, 0, len(aggFuncList)+p.Schema().Len())...) From dd93228e49410d6936956eeabe7f517bba9d3ba9 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Thu, 25 Oct 2018 19:33:39 +0800 Subject: [PATCH 18/21] address comment --- planner/core/cbo_test.go | 2 +- planner/core/expression_rewriter.go | 2 +- planner/core/logical_plan_builder.go | 2 +- planner/core/logical_plan_test.go | 2 +- planner/core/optimizer.go | 4 +--- 5 files changed, 5 insertions(+), 7 deletions(-) diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go index 6a3f123e2fb9f..1bb6d3f9e1ce6 100644 --- a/planner/core/cbo_test.go +++ b/planner/core/cbo_test.go @@ -658,7 +658,7 @@ func (s *testAnalyzeSuite) TestCorrelatedEstimation(c *C) { tk.MustQuery("explain select t.c in (select count(*) from t s , t t1 where s.a = t.a and s.a = t1.a) from t;"). Check(testkit.Rows( "Projection_11 10.00 root 9_aux_0", - "└─Apply_13 10.00 root left outer semi join, inner:StreamAgg_20, equal:[eq(test.t.c, count(*))]", + "└─Apply_13 10.00 root left outer semi join, inner:StreamAgg_20, equal:[eq(test.t.c, 7_col_0)]", " ├─TableReader_15 10.00 root data:TableScan_14", " │ └─TableScan_14 10.00 cop table:t, range:[-inf,+inf], keep order:false", " └─StreamAgg_20 1.00 root funcs:count(1)", diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 2e6d36fcd22b9..ea98981355bf3 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -643,7 +643,7 @@ func (er *expressionRewriter) handleInSubquery(v *ast.PatternInExpr) (ast.Node, if er.ctx.GetSessionVars().AllowInSubqToJoinAndAgg && !v.Not && !asScalar && len(np.extractCorrelatedCols()) == 0 { // We need to try to eliminate the agg and the projection produced by this operation. er.b.optFlag |= flagEliminateAgg - er.b.optFlag |= flagEliminateProjection2 + er.b.optFlag |= flagEliminateProjection // Build distinct for the inner query. agg := er.b.buildDistinct(np, np.Schema().Len()) for _, col := range agg.schema.Columns { diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 18bc2eefca5aa..0b1d6f2116d9f 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -77,7 +77,7 @@ func (b *PlanBuilder) buildAggregation(p LogicalPlan, aggFuncList []*ast.Aggrega // when we eliminate the max and min we may add `is not null` filter. b.optFlag = b.optFlag | flagPredicatePushDown b.optFlag = b.optFlag | flagEliminateAgg - b.optFlag = b.optFlag | flagEliminateProjection2 + b.optFlag = b.optFlag | flagEliminateProjection plan4Agg := LogicalAggregation{AggFuncs: make([]*aggregation.AggFuncDesc, 0, len(aggFuncList))}.init(b.ctx) schema4Agg := expression.NewSchema(make([]*expression.Column, 0, len(aggFuncList)+p.Schema().Len())...) diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go index 53562eaa492b2..7f115234ff213 100644 --- a/planner/core/logical_plan_test.go +++ b/planner/core/logical_plan_test.go @@ -1558,7 +1558,7 @@ func (s *testPlanSuite) TestAggPrune(c *C) { p, err := BuildLogicalPlan(s.ctx, stmt, s.is) c.Assert(err, IsNil) - p, err = logicalOptimize(flagPredicatePushDown|flagPrunColumns|flagBuildKeyInfo|flagEliminateAgg|flagEliminateProjection|flagEliminateProjection2, p.(LogicalPlan)) + p, err = logicalOptimize(flagPredicatePushDown|flagPrunColumns|flagBuildKeyInfo|flagEliminateAgg|flagEliminateProjection, p.(LogicalPlan)) c.Assert(err, IsNil) c.Assert(ToString(p), Equals, tt.best, comment) } diff --git a/planner/core/optimizer.go b/planner/core/optimizer.go index a0551c9bca40f..33ff0d44e1832 100644 --- a/planner/core/optimizer.go +++ b/planner/core/optimizer.go @@ -33,11 +33,10 @@ var AllowCartesianProduct = true const ( flagPrunColumns uint64 = 1 << iota - flagEliminateProjection flagBuildKeyInfo flagDecorrelate flagEliminateAgg - flagEliminateProjection2 + flagEliminateProjection flagMaxMinEliminate flagPredicatePushDown flagPartitionProcessor @@ -47,7 +46,6 @@ const ( var optRuleList = []logicalOptRule{ &columnPruner{}, - &projectionEliminater{}, &buildKeySolver{}, &decorrelateSolver{}, &aggregationEliminator{}, From d93382617d21472952eb11e74273ab2eefc82da8 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Thu, 25 Oct 2018 19:35:36 +0800 Subject: [PATCH 19/21] fix explain_test --- cmd/explaintest/r/explain_easy.result | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index d146bb3ea15f4..3b369cd2f7e3b 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -274,7 +274,7 @@ create table t(a int primary key, b int, c int, index idx(b)); explain select t.c in (select count(*) from t s ignore index(idx), t t1 where s.a = t.a and s.a = t1.a) from t; id count task operator info Projection_11 10000.00 root 9_aux_0 -└─Apply_13 10000.00 root left outer semi join, inner:StreamAgg_20, equal:[eq(test.t.c, count(*))] +└─Apply_13 10000.00 root left outer semi join, inner:StreamAgg_20, equal:[eq(test.t.c, 7_col_0)] ├─TableReader_15 10000.00 root data:TableScan_14 │ └─TableScan_14 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo └─StreamAgg_20 1.00 root funcs:count(1) @@ -287,7 +287,7 @@ Projection_11 10000.00 root 9_aux_0 explain select t.c in (select count(*) from t s use index(idx), t t1 where s.b = t.a and s.a = t1.a) from t; id count task operator info Projection_11 10000.00 root 9_aux_0 -└─Apply_13 10000.00 root left outer semi join, inner:StreamAgg_20, equal:[eq(test.t.c, count(*))] +└─Apply_13 10000.00 root left outer semi join, inner:StreamAgg_20, equal:[eq(test.t.c, 7_col_0)] ├─TableReader_15 10000.00 root data:TableScan_14 │ └─TableScan_14 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo └─StreamAgg_20 1.00 root funcs:count(1) @@ -299,7 +299,7 @@ Projection_11 10000.00 root 9_aux_0 explain select t.c in (select count(*) from t s use index(idx), t t1 where s.b = t.a and s.c = t1.a) from t; id count task operator info Projection_11 10000.00 root 9_aux_0 -└─Apply_13 10000.00 root left outer semi join, inner:StreamAgg_20, equal:[eq(test.t.c, count(*))] +└─Apply_13 10000.00 root left outer semi join, inner:StreamAgg_20, equal:[eq(test.t.c, 7_col_0)] ├─TableReader_15 10000.00 root data:TableScan_14 │ └─TableScan_14 10000.00 cop table:t, range:[-inf,+inf], keep order:false, stats:pseudo └─StreamAgg_20 1.00 root funcs:count(1) From e788281ac1173d914a108705929431f247cb96d3 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Tue, 30 Oct 2018 18:40:05 +0800 Subject: [PATCH 20/21] add it back --- cmd/explaintest/r/explain_easy.result | 1 + cmd/explaintest/r/explain_easy_stats.result | 1 + cmd/explaintest/t/explain_easy.test | 2 ++ cmd/explaintest/t/explain_easy_stats.test | 2 ++ 4 files changed, 6 insertions(+) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index fc3d428f1097d..2cca4bb3b0b67 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -186,6 +186,7 @@ HashAgg_18 24000.00 root group by:t2.c1, funcs:firstrow(join_agg_0) └─IndexReader_67 8000.00 root index:StreamAgg_57 └─StreamAgg_57 8000.00 cop group by:test.t2.c1, funcs:firstrow(test.t2.c1), firstrow(test.t2.c1) └─IndexScan_65 10000.00 cop table:t2, index:c1, range:[NULL,+inf], keep order:true, stats:pseudo +set @@session.tidb_opt_insubq_to_join_and_agg=0; explain select sum(t1.c1 in (select c1 from t2)) from t1; id count task operator info StreamAgg_12 1.00 root funcs:sum(5_aux_0) diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result index d064981a222e6..08eef50780520 100644 --- a/cmd/explaintest/r/explain_easy_stats.result +++ b/cmd/explaintest/r/explain_easy_stats.result @@ -119,6 +119,7 @@ Limit_10 1.00 root offset:0, count:1 └─TableReader_21 1.00 root data:Limit_20 └─Limit_20 1.00 cop offset:0, count:1 └─TableScan_18 1.00 cop table:t1, range:[-inf,+inf], keep order:true, desc +set @@session.tidb_opt_insubq_to_join_and_agg=0; explain select 1 in (select c2 from t2) from t1; id count task operator info Projection_6 1999.00 root 5_aux_0 diff --git a/cmd/explaintest/t/explain_easy.test b/cmd/explaintest/t/explain_easy.test index 2ab0e166e2387..59047bfdd7062 100644 --- a/cmd/explaintest/t/explain_easy.test +++ b/cmd/explaintest/t/explain_easy.test @@ -34,6 +34,8 @@ explain select if(10, t1.c1, t1.c2) from t1; explain select c1 from t2 union select c1 from t2 union all select c1 from t2; explain select c1 from t2 union all select c1 from t2 union select c1 from t2; +set @@session.tidb_opt_insubq_to_join_and_agg=0; + explain select sum(t1.c1 in (select c1 from t2)) from t1; explain select 1 in (select c2 from t2) from t1; explain select sum(6 in (select c2 from t2)) from t1; diff --git a/cmd/explaintest/t/explain_easy_stats.test b/cmd/explaintest/t/explain_easy_stats.test index 731b1a6b65635..09c980a52b27e 100644 --- a/cmd/explaintest/t/explain_easy_stats.test +++ b/cmd/explaintest/t/explain_easy_stats.test @@ -37,6 +37,8 @@ explain select * from information_schema.columns; explain select c2 = (select c2 from t2 where t1.c1 = t2.c1 order by c1 limit 1) from t1; explain select * from t1 order by c1 desc limit 1; +set @@session.tidb_opt_insubq_to_join_and_agg=0; + # explain select sum(t1.c1 in (select c1 from t2)) from t1; explain select 1 in (select c2 from t2) from t1; # explain select sum(6 in (select c2 from t2)) from t1; From fe1370cf03381c443743575d4ee0a2e75f3a2be4 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Tue, 30 Oct 2018 18:59:15 +0800 Subject: [PATCH 21/21] fix explain test --- cmd/explaintest/r/explain_easy.result | 2 ++ cmd/explaintest/r/explain_easy_stats.result | 2 ++ cmd/explaintest/t/explain_easy.test | 2 ++ cmd/explaintest/t/explain_easy_stats.test | 2 ++ 4 files changed, 8 insertions(+) diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index 2cca4bb3b0b67..8a99f3bde0c30 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -6,6 +6,7 @@ insert into t2 values(1, 0), (2, 1); create table t3 (a bigint, b bigint, c bigint, d bigint); create table t4 (a int, b int, c int, index idx(a, b), primary key(a)); set @@session.tidb_opt_agg_push_down = 1; +set @@session.tidb_opt_insubq_to_join_and_agg=1; explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a ); id count task operator info Projection_12 8000.00 root test.t3.a, test.t3.b, test.t3.c, test.t3.d @@ -452,3 +453,4 @@ Projection_4 2666.67 root test.t.a └─Selection_6 2666.67 cop gt(test.t.a, 0) └─TableScan_5 3333.33 cop table:t, range:(0,+inf], keep order:false, stats:pseudo drop table if exists t; +set @@session.tidb_opt_insubq_to_join_and_agg=1; diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result index 08eef50780520..18b351916ff89 100644 --- a/cmd/explaintest/r/explain_easy_stats.result +++ b/cmd/explaintest/r/explain_easy_stats.result @@ -9,6 +9,7 @@ load stats 's/explain_easy_stats_t3.json'; create table index_prune(a bigint(20) NOT NULL, b bigint(20) NOT NULL, c tinyint(4) NOT NULL, primary key(a, b), index idx_b_c_a(b, c, a)); load stats 's/explain_easy_stats_index_prune.json'; set @@session.tidb_opt_agg_push_down = 1; +set @@session.tidb_opt_insubq_to_join_and_agg=1; explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a ); id count task operator info Projection_12 1600.00 root test.t3.a, test.t3.b, test.t3.c, test.t3.d @@ -190,3 +191,4 @@ explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085 id count task operator info Point_Get_1 1.00 root table:index_prune, index:a b drop table if exists t1, t2, t3, index_prune; +set @@session.tidb_opt_insubq_to_join_and_agg=1; diff --git a/cmd/explaintest/t/explain_easy.test b/cmd/explaintest/t/explain_easy.test index 59047bfdd7062..c8fcf28f94cff 100644 --- a/cmd/explaintest/t/explain_easy.test +++ b/cmd/explaintest/t/explain_easy.test @@ -7,6 +7,7 @@ create table t3 (a bigint, b bigint, c bigint, d bigint); create table t4 (a int, b int, c int, index idx(a, b), primary key(a)); set @@session.tidb_opt_agg_push_down = 1; +set @@session.tidb_opt_insubq_to_join_and_agg=1; explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a ); explain select * from t1; @@ -99,3 +100,4 @@ explain select * from t where _tidb_rowid > 0; explain select a, _tidb_rowid from t where a > 0; explain select * from t where _tidb_rowid > 0 and a > 0; drop table if exists t; +set @@session.tidb_opt_insubq_to_join_and_agg=1; diff --git a/cmd/explaintest/t/explain_easy_stats.test b/cmd/explaintest/t/explain_easy_stats.test index 09c980a52b27e..fc9cee2f9c5bc 100644 --- a/cmd/explaintest/t/explain_easy_stats.test +++ b/cmd/explaintest/t/explain_easy_stats.test @@ -9,6 +9,7 @@ load stats 's/explain_easy_stats_t3.json'; create table index_prune(a bigint(20) NOT NULL, b bigint(20) NOT NULL, c tinyint(4) NOT NULL, primary key(a, b), index idx_b_c_a(b, c, a)); load stats 's/explain_easy_stats_index_prune.json'; set @@session.tidb_opt_agg_push_down = 1; +set @@session.tidb_opt_insubq_to_join_and_agg=1; explain select * from t3 where exists (select s.a from t3 s having sum(s.a) = t3.a ); @@ -56,3 +57,4 @@ explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085 explain select * from index_prune WHERE a = 1010010404050976781 AND b = 26467085526790 GROUP BY b ORDER BY a limit 1; drop table if exists t1, t2, t3, index_prune; +set @@session.tidb_opt_insubq_to_join_and_agg=1;