From 045dba6d4cac60a8b7951ff76aa9d04e68b54bce Mon Sep 17 00:00:00 2001 From: "Zhuomin(Charming) Liu" Date: Wed, 4 Mar 2020 19:57:17 +0800 Subject: [PATCH 1/5] planner: not to generate partial agg when cop task has root conditions (#15112) --- planner/core/integration_test.go | 35 ++++++++++++++++++++++++++++++++ planner/core/task.go | 31 +++++++++++++++------------- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 0d713339575fd..4fe6e59ec5596 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -301,6 +301,41 @@ func (s *testIntegrationSuite) TestSelPushDownTiFlash(c *C) { )) } +func (s *testIntegrationSuite) TestIssue15110(c *C) { + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists crm_rd_150m") + tk.MustExec(`CREATE TABLE crm_rd_150m ( + product varchar(256) DEFAULT NULL, + uks varchar(16) DEFAULT NULL, + brand varchar(256) DEFAULT NULL, + cin varchar(16) DEFAULT NULL, + created_date timestamp NULL DEFAULT NULL, + quantity int(11) DEFAULT NULL, + amount decimal(11,0) DEFAULT NULL, + pl_date timestamp NULL DEFAULT NULL, + customer_first_date timestamp NULL DEFAULT NULL, + recent_date timestamp NULL DEFAULT NULL + ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;`) + + // Create virtual tiflash replica info. + dom := domain.GetDomain(tk.Se) + is := dom.InfoSchema() + db, exists := is.SchemaByName(model.NewCIStr("test")) + c.Assert(exists, IsTrue) + for _, tblInfo := range db.Tables { + if tblInfo.Name.L == "crm_rd_150m" { + tblInfo.TiFlashReplica = &model.TiFlashReplicaInfo{ + Count: 1, + Available: true, + } + } + } + + tk.MustExec("set @@session.tidb_isolation_read_engines = 'tiflash'") + tk.MustExec("explain SELECT count(*) FROM crm_rd_150m dataset_48 WHERE (CASE WHEN (month(dataset_48.customer_first_date)) <= 30 THEN '新客' ELSE NULL END) IS NOT NULL;") +} + func (s *testIntegrationSuite) TestReadFromStorageHint(c *C) { tk := testkit.NewTestKit(c, s.store) diff --git a/planner/core/task.go b/planner/core/task.go index 3acc816956d2f..e25f74116d5cf 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -522,7 +522,7 @@ func (p *PhysicalLimit) attach2Task(tasks ...task) task { if cop, ok := t.(*copTask); ok { // For double read which requires order being kept, the limit cannot be pushed down to the table side, // because handles would be reordered before being sent to table scan. - if !cop.keepOrder || !cop.indexPlanFinished || cop.indexPlan == nil { + if (!cop.keepOrder || !cop.indexPlanFinished || cop.indexPlan == nil) && len(cop.rootTaskConds) == 0 { // When limit is pushed down, we should remove its offset. newCount := p.Offset + p.Count childProfile := cop.plan().statsInfo() @@ -654,7 +654,7 @@ func (p *PhysicalTopN) getPushedDownTopN(childPlan PhysicalPlan) *PhysicalTopN { func (p *PhysicalTopN) attach2Task(tasks ...task) task { t := tasks[0].copy() inputCount := t.count() - if copTask, ok := t.(*copTask); ok && p.canPushDown() { + if copTask, ok := t.(*copTask); ok && p.canPushDown() && len(copTask.rootTaskConds) == 0 { // If all columns in topN are from index plan, we push it to index plan, otherwise we finish the index plan and // push it to table plan. var pushedDownTopN *PhysicalTopN @@ -824,7 +824,7 @@ func (p *PhysicalStreamAgg) attach2Task(tasks ...task) task { // The `doubleReadNeedProj` is always set if the double read needs to keep order. So we just use it to decided // whether the following plan is double read with order reserved. copToFlash := isFlashCopTask(cop) - if !cop.doubleReadNeedProj { + if !cop.doubleReadNeedProj || len(cop.rootTaskConds) == 0{ partialAgg, finalAgg := p.newPartialAggregate(copToFlash) if partialAgg != nil { if cop.tablePlan != nil { @@ -904,7 +904,7 @@ func (p *PhysicalHashAgg) attach2Task(tasks ...task) task { // copToFlash means whether the cop task is running on flash storage copToFlash := isFlashCopTask(cop) partialAgg, finalAgg := p.newPartialAggregate(copToFlash) - if partialAgg != nil { + if partialAgg != nil || len(cop.rootTaskConds) == 0 { if cop.tablePlan != nil { cop.finishIndexPlan() partialAgg.SetChildren(cop.tablePlan) @@ -913,17 +913,20 @@ func (p *PhysicalHashAgg) attach2Task(tasks ...task) task { partialAgg.SetChildren(cop.indexPlan) cop.indexPlan = partialAgg } - cop.addCost(p.GetCost(inputRows, false)) + // In `newPartialAggregate`, we are using stats of final aggregation as stats + // of `partialAgg`, so the network cost of transferring result rows of `partialAgg` + // to TiDB is normally under-estimated for hash aggregation, since the group-by + // column may be independent of the column used for region distribution, so a closer + // estimation of network cost for hash aggregation may multiply the number of + // regions involved in the `partialAgg`, which is unknown however. + t = finishCopTask(p.ctx, cop) + inputRows = t.count() + attachPlan2Task(finalAgg, t) + } else { + t = finishCopTask(p.ctx, cop) + inputRows = t.count() + attachPlan2Task(p, t) } - // In `newPartialAggregate`, we are using stats of final aggregation as stats - // of `partialAgg`, so the network cost of transferring result rows of `partialAgg` - // to TiDB is normally under-estimated for hash aggregation, since the group-by - // column may be independent of the column used for region distribution, so a closer - // estimation of network cost for hash aggregation may multiply the number of - // regions involved in the `partialAgg`, which is unknown however. - t = finishCopTask(p.ctx, cop) - inputRows = t.count() - attachPlan2Task(finalAgg, t) } else { attachPlan2Task(p, t) } From 9d17231dc07ea7b7f8da7598a9e21c2ad92b2b20 Mon Sep 17 00:00:00 2001 From: lzmhhh123 Date: Thu, 5 Mar 2020 10:23:43 +0800 Subject: [PATCH 2/5] fix fmt --- planner/core/task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/task.go b/planner/core/task.go index e25f74116d5cf..549a97cb9aee9 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -824,7 +824,7 @@ func (p *PhysicalStreamAgg) attach2Task(tasks ...task) task { // The `doubleReadNeedProj` is always set if the double read needs to keep order. So we just use it to decided // whether the following plan is double read with order reserved. copToFlash := isFlashCopTask(cop) - if !cop.doubleReadNeedProj || len(cop.rootTaskConds) == 0{ + if !cop.doubleReadNeedProj || len(cop.rootTaskConds) == 0 { partialAgg, finalAgg := p.newPartialAggregate(copToFlash) if partialAgg != nil { if cop.tablePlan != nil { From 05308b2e3beb0b7b994c75533a5e0a80fd0daefe Mon Sep 17 00:00:00 2001 From: lzmhhh123 Date: Thu, 5 Mar 2020 11:15:17 +0800 Subject: [PATCH 3/5] fix --- planner/core/task.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/planner/core/task.go b/planner/core/task.go index 549a97cb9aee9..cd72531a8c12a 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -904,7 +904,7 @@ func (p *PhysicalHashAgg) attach2Task(tasks ...task) task { // copToFlash means whether the cop task is running on flash storage copToFlash := isFlashCopTask(cop) partialAgg, finalAgg := p.newPartialAggregate(copToFlash) - if partialAgg != nil || len(cop.rootTaskConds) == 0 { + if partialAgg != nil && len(cop.rootTaskConds) == 0 { if cop.tablePlan != nil { cop.finishIndexPlan() partialAgg.SetChildren(cop.tablePlan) @@ -913,6 +913,7 @@ func (p *PhysicalHashAgg) attach2Task(tasks ...task) task { partialAgg.SetChildren(cop.indexPlan) cop.indexPlan = partialAgg } + cop.addCost(p.GetCost(inputRows, false)) // In `newPartialAggregate`, we are using stats of final aggregation as stats // of `partialAgg`, so the network cost of transferring result rows of `partialAgg` // to TiDB is normally under-estimated for hash aggregation, since the group-by @@ -924,7 +925,6 @@ func (p *PhysicalHashAgg) attach2Task(tasks ...task) task { attachPlan2Task(finalAgg, t) } else { t = finishCopTask(p.ctx, cop) - inputRows = t.count() attachPlan2Task(p, t) } } else { From b3f0a891d5fee1e10f839bc531645dcfd0c94d00 Mon Sep 17 00:00:00 2001 From: lzmhhh123 Date: Thu, 5 Mar 2020 14:52:08 +0800 Subject: [PATCH 4/5] fix --- planner/core/task.go | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/planner/core/task.go b/planner/core/task.go index cd72531a8c12a..edb5da3963c4a 100644 --- a/planner/core/task.go +++ b/planner/core/task.go @@ -824,7 +824,7 @@ func (p *PhysicalStreamAgg) attach2Task(tasks ...task) task { // The `doubleReadNeedProj` is always set if the double read needs to keep order. So we just use it to decided // whether the following plan is double read with order reserved. copToFlash := isFlashCopTask(cop) - if !cop.doubleReadNeedProj || len(cop.rootTaskConds) == 0 { + if !cop.doubleReadNeedProj && len(cop.rootTaskConds) == 0 { partialAgg, finalAgg := p.newPartialAggregate(copToFlash) if partialAgg != nil { if cop.tablePlan != nil { @@ -901,19 +901,21 @@ func (p *PhysicalHashAgg) attach2Task(tasks ...task) task { t := tasks[0].copy() inputRows := t.count() if cop, ok := t.(*copTask); ok { - // copToFlash means whether the cop task is running on flash storage - copToFlash := isFlashCopTask(cop) - partialAgg, finalAgg := p.newPartialAggregate(copToFlash) - if partialAgg != nil && len(cop.rootTaskConds) == 0 { - if cop.tablePlan != nil { - cop.finishIndexPlan() - partialAgg.SetChildren(cop.tablePlan) - cop.tablePlan = partialAgg - } else { - partialAgg.SetChildren(cop.indexPlan) - cop.indexPlan = partialAgg + if len(cop.rootTaskConds) == 0 { + // copToFlash means whether the cop task is running on flash storage + copToFlash := isFlashCopTask(cop) + partialAgg, finalAgg := p.newPartialAggregate(copToFlash) + if partialAgg != nil { + if cop.tablePlan != nil { + cop.finishIndexPlan() + partialAgg.SetChildren(cop.tablePlan) + cop.tablePlan = partialAgg + } else { + partialAgg.SetChildren(cop.indexPlan) + cop.indexPlan = partialAgg + } + cop.addCost(p.GetCost(inputRows, false)) } - cop.addCost(p.GetCost(inputRows, false)) // In `newPartialAggregate`, we are using stats of final aggregation as stats // of `partialAgg`, so the network cost of transferring result rows of `partialAgg` // to TiDB is normally under-estimated for hash aggregation, since the group-by From d39c00175853b47976146c73894d3ca32e4b5f71 Mon Sep 17 00:00:00 2001 From: lzmhhh123 Date: Mon, 9 Mar 2020 14:53:35 +0800 Subject: [PATCH 5/5] fix --- planner/core/planbuilder.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index b1cfa2ee4d1d1..b12f6e7f237ae 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -612,7 +612,7 @@ func (b *PlanBuilder) getPossibleAccessPaths(indexHints []*ast.IndexHint, tblInf func (b *PlanBuilder) filterPathByIsolationRead(paths []*accessPath, dbName model.CIStr) ([]*accessPath, error) { // TODO: filter paths with isolation read locations. - if dbName.L == mysql.SystemDB { + if dbName.L == mysql.SystemDB || dbName.L == "information_schema" || dbName.L == "performance_schema" { return paths, nil } cfgIsolationEngines := set.StringSet{}