diff --git a/cmd/explaintest/r/generated_columns.result b/cmd/explaintest/r/generated_columns.result index 0add5d3921876..33865a38aeae5 100644 --- a/cmd/explaintest/r/generated_columns.result +++ b/cmd/explaintest/r/generated_columns.result @@ -136,3 +136,42 @@ Union_13 23263.33 root └─TableReader_34 3323.33 root data:Selection_33 └─Selection_33 3323.33 cop lt(test.sgc3.a, 7) └─TableScan_32 10000.00 cop table:sgc3, partition:max, range:[-inf,+inf], keep order:false, stats:pseudo +DROP TABLE IF EXISTS t1; +CREATE TABLE t1(a INT, b INT AS (a+1) VIRTUAL, c INT AS (b+1) VIRTUAL, d INT AS (c+1) VIRTUAL, KEY(b), INDEX IDX(c, d)); +INSERT INTO t1 (a) VALUES (0); +EXPLAIN SELECT b FROM t1 WHERE b=1; +id count task operator info +IndexReader_6 10.00 root index:IndexScan_5 +└─IndexScan_5 10.00 cop table:t1, index:b, range:[1,1], keep order:false, stats:pseudo +EXPLAIN SELECT b, c, d FROM t1 WHERE b=1; +id count task operator info +Projection_11 10.00 root cast(plus(test.t1.a, 1)), cast(plus(cast(plus(test.t1.a, 1)), 1)), cast(plus(cast(plus(cast(plus(test.t1.a, 1)), 1)), 1)) +└─IndexLookUp_12 10.00 root + ├─IndexScan_9 10.00 cop table:t1, index:b, range:[1,1], keep order:false, stats:pseudo + └─TableScan_10 10.00 cop table:t1, keep order:false, stats:pseudo +EXPLAIN SELECT * FROM t1 WHERE b=1; +id count task operator info +Projection_11 10.00 root test.t1.a, cast(plus(test.t1.a, 1)), cast(plus(cast(plus(test.t1.a, 1)), 1)), cast(plus(cast(plus(cast(plus(test.t1.a, 1)), 1)), 1)) +└─IndexLookUp_12 10.00 root + ├─IndexScan_9 10.00 cop table:t1, index:b, range:[1,1], keep order:false, stats:pseudo + └─TableScan_10 10.00 cop table:t1, keep order:false, stats:pseudo +EXPLAIN SELECT c FROM t1 WHERE c=2 AND d=3; +id count task operator info +Projection_4 0.10 root test.t1.c +└─IndexReader_6 0.10 root index:IndexScan_5 + └─IndexScan_5 0.10 cop table:t1, index:c, d, range:[2 3,2 3], keep order:false, stats:pseudo +DROP TABLE IF EXISTS person; +CREATE TABLE person ( +id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +name VARCHAR(255) NOT NULL, +address_info JSON, +city_no INT AS (JSON_EXTRACT(address_info, '$.city_no')) VIRTUAL, +KEY(city_no)); +INSERT INTO person (name, address_info) VALUES ("John", CAST('{"city_no": 1}' AS JSON)); +EXPLAIN SELECT name FROM person where city_no=1; +id count task operator info +Projection_4 10.00 root test.person.name +└─Projection_11 10.00 root test.person.name, cast(json_extract(test.person.address_info, "$.city_no")) + └─IndexLookUp_12 10.00 root + ├─IndexScan_9 10.00 cop table:person, index:city_no, range:[1,1], keep order:false, stats:pseudo + └─TableScan_10 10.00 cop table:person, keep order:false, stats:pseudo diff --git a/cmd/explaintest/t/generated_columns.test b/cmd/explaintest/t/generated_columns.test index 5d783c1f4daac..61c6cae64a373 100644 --- a/cmd/explaintest/t/generated_columns.test +++ b/cmd/explaintest/t/generated_columns.test @@ -90,3 +90,24 @@ PARTITION max VALUES LESS THAN MAXVALUE); EXPLAIN SELECT * FROM sgc3 WHERE a <= 1; EXPLAIN SELECT * FROM sgc3 WHERE a < 7; +-- Virtual generated columns as indices + +DROP TABLE IF EXISTS t1; +CREATE TABLE t1(a INT, b INT AS (a+1) VIRTUAL, c INT AS (b+1) VIRTUAL, d INT AS (c+1) VIRTUAL, KEY(b), INDEX IDX(c, d)); +INSERT INTO t1 (a) VALUES (0); + +EXPLAIN SELECT b FROM t1 WHERE b=1; +EXPLAIN SELECT b, c, d FROM t1 WHERE b=1; +EXPLAIN SELECT * FROM t1 WHERE b=1; +EXPLAIN SELECT c FROM t1 WHERE c=2 AND d=3; + +DROP TABLE IF EXISTS person; +CREATE TABLE person ( +id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +name VARCHAR(255) NOT NULL, +address_info JSON, +city_no INT AS (JSON_EXTRACT(address_info, '$.city_no')) VIRTUAL, +KEY(city_no)); + +INSERT INTO person (name, address_info) VALUES ("John", CAST('{"city_no": 1}' AS JSON)); +EXPLAIN SELECT name FROM person where city_no=1; diff --git a/executor/builder.go b/executor/builder.go index ebd8295088404..85fdf930ec86b 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -1946,7 +1946,12 @@ type dataReaderBuilder struct { func (builder *dataReaderBuilder) buildExecutorForIndexJoin(ctx context.Context, lookUpContents []*indexJoinLookUpContent, IndexRanges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager) (Executor, error) { - switch v := builder.Plan.(type) { + return builder.buildReaderForIndexJoin(ctx, builder.Plan, lookUpContents, IndexRanges, keyOff2IdxOff, cwc) +} + +func (builder *dataReaderBuilder) buildReaderForIndexJoin(ctx context.Context, p plannercore.Plan, + lookUpContents []*indexJoinLookUpContent, IndexRanges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager) (Executor, error) { + switch v := p.(type) { case *plannercore.PhysicalTableReader: return builder.buildTableReaderForIndexJoin(ctx, v, lookUpContents) case *plannercore.PhysicalIndexReader: @@ -1955,10 +1960,31 @@ func (builder *dataReaderBuilder) buildExecutorForIndexJoin(ctx context.Context, return builder.buildIndexLookUpReaderForIndexJoin(ctx, v, lookUpContents, IndexRanges, keyOff2IdxOff, cwc) case *plannercore.PhysicalUnionScan: return builder.buildUnionScanForIndexJoin(ctx, v, lookUpContents, IndexRanges, keyOff2IdxOff, cwc) + case *plannercore.PhysicalProjection: + return builder.buildProjectionForIndexJoin(ctx, v, lookUpContents, IndexRanges, keyOff2IdxOff, cwc) } + return nil, errors.New("Wrong plan type for dataReaderBuilder") } +func (builder *dataReaderBuilder) buildProjectionForIndexJoin(ctx context.Context, v *plannercore.PhysicalProjection, + lookUpContents []*indexJoinLookUpContent, IndexRanges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager) (Executor, error) { + childExec, err := builder.buildReaderForIndexJoin(ctx, v.Children()[0], lookUpContents, IndexRanges, keyOff2IdxOff, cwc) + if err != nil { + return nil, err + } + e := &ProjectionExec{ + baseExecutor: newBaseExecutor(builder.ctx, v.Schema(), v.ExplainID(), childExec), + numWorkers: 0, // always run in un-parallel mode to avoid too many number of goroutines. + evaluatorSuit: expression.NewEvaluatorSuite(v.Exprs, v.AvoidColumnEvaluator), + calculateNoDelay: v.CalculateNoDelay, + } + if err := e.open(ctx); err != nil { + return nil, nil + } + return e, nil +} + func (builder *dataReaderBuilder) buildUnionScanForIndexJoin(ctx context.Context, v *plannercore.PhysicalUnionScan, values []*indexJoinLookUpContent, indexRanges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager) (Executor, error) { childBuilder := &dataReaderBuilder{Plan: v.Children()[0], executorBuilder: builder.executorBuilder} diff --git a/executor/projection.go b/executor/projection.go index be6aafeeb1b42..225e9b1205778 100644 --- a/executor/projection.go +++ b/executor/projection.go @@ -78,6 +78,10 @@ func (e *ProjectionExec) Open(ctx context.Context) error { return err } + return e.open(ctx) +} + +func (e *ProjectionExec) open(ctx context.Context) error { e.prepared = false e.parentReqRows = int64(e.maxChunkSize) diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go index 69f8fbe284ece..384742de53feb 100644 --- a/planner/core/cbo_test.go +++ b/planner/core/cbo_test.go @@ -959,32 +959,79 @@ func (s *testAnalyzeSuite) TestIssue9805(c *C) { // Expected output is like: // - // +--------------------------------+----------+------+----------------------------------------------------------------------------------+----------------------------------+ - // | id | count | task | operator info | execution info | - // +--------------------------------+----------+------+----------------------------------------------------------------------------------+----------------------------------+ - // | Projection_9 | 10.00 | root | test.t1.id, test.t2.a | time:203.355µs, loops:1, rows:0 | - // | └─IndexJoin_13 | 10.00 | root | inner join, inner:IndexLookUp_12, outer key:test.t1.a, inner key:test.t2.d | time:199.633µs, loops:1, rows:0 | - // | ├─Projection_16 | 8.00 | root | test.t1.id, test.t1.a, test.t1.b, cast(mod(test.t1.a, 30)) | time:164.587µs, loops:1, rows:0 | - // | │ └─Selection_17 | 8.00 | root | eq(cast(mod(test.t1.a, 30)), 4) | time:157.768µs, loops:1, rows:0 | - // | │ └─TableReader_20 | 10.00 | root | data:Selection_19 | time:154.61µs, loops:1, rows:0 | - // | │ └─Selection_19 | 10.00 | cop | eq(test.t1.b, "t2") | time:28.824µs, loops:1, rows:0 | - // | │ └─TableScan_18 | 10000.00 | cop | table:t1, range:[-inf,+inf], keep order:false, stats:pseudo | time:27.654µs, loops:1, rows:0 | - // | └─IndexLookUp_12 | 10.00 | root | | time:0ns, loops:0, rows:0 | - // | ├─IndexScan_10 | 10.00 | cop | table:t2, index:d, range: decided by [test.t1.a], keep order:false, stats:pseudo | time:0ns, loops:0, rows:0 | - // | └─TableScan_11 | 10.00 | cop | table:t2, keep order:false, stats:pseudo | time:0ns, loops:0, rows:0 | - // +--------------------------------+----------+------+----------------------------------------------------------------------------------+----------------------------------+ - // 10 rows in set (0.00 sec) + //+----------------------------+-------+------+-------------------------------------------------------------------------------------------------+----------------------------------+ + //| id | count | task | operator info | execution info | + //+----------------------------+-------+------+-------------------------------------------------------------------------------------------------+----------------------------------+ + //| Projection_7 | 0.12 | root | test.t1.id, test.t2.a | time:3.844593ms, loops:1, rows:0 | + //| └─IndexJoin_11 | 0.12 | root | inner join, inner:IndexLookUp_10, outer key:test.t1.a, inner key:test.t2.d | time:3.830714ms, loops:1, rows:0 | + //| ├─Projection_21 | 0.10 | root | test.t1.id, test.t1.a, test.t1.b, cast(mod(test.t1.a, 30)) | time:3.735174ms, loops:1, rows:0 | + //| │ └─IndexLookUp_22 | 0.10 | root | | time:3.569946ms, loops:1, rows:0 | + //| │ ├─IndexScan_19 | 0.10 | cop | table:t1, index:d, b, c, range:[4 "t2",4 "t2"], keep order:false, stats:pseudo | time:50.542µs, loops:1, rows:0 | + //| │ └─TableScan_20 | 0.10 | cop | table:t1, keep order:false, stats:pseudo | time:0s, loops:0, rows:0 | + //| └─IndexLookUp_10 | 10.00 | root | | time:0ns, loops:0, rows:0 | + //| ├─IndexScan_8 | 10.00 | cop | table:t2, index:d, range: decided by [eq(test.t2.d, test.t1.a)], keep order:false, stats:pseudo | time:0ns, loops:0, rows:0 | + //| └─TableScan_9 | 10.00 | cop | table:t2, keep order:false, stats:pseudo | time:0ns, loops:0, rows:0 | + //+----------------------------+-------+------+-------------------------------------------------------------------------------------------------+----------------------------------+ + //9 rows in set (0.01 sec) // - c.Assert(rs.Rows(), HasLen, 10) - hasIndexLookUp12 := false + c.Assert(rs.Rows(), HasLen, 9) + hasIndexLookUp10 := false + hasIndexLookUp22 := false for _, row := range rs.Rows() { c.Assert(row, HasLen, 6) - if strings.HasSuffix(row[0].(string), "IndexLookUp_12") { - hasIndexLookUp12 = true + if strings.Contains(row[0].(string), "IndexLookUp_10") { + hasIndexLookUp10 = true c.Assert(row[4], Equals, "time:0ns, loops:0, rows:0") } + if strings.Contains(row[0].(string), "IndexLookUp_22") { + hasIndexLookUp22 = true + } } - c.Assert(hasIndexLookUp12, IsTrue) + c.Assert(hasIndexLookUp10, IsTrue) + c.Assert(hasIndexLookUp22, IsTrue) +} + +func (s *testAnalyzeSuite) TestVirtualGeneratedColumn(c *C) { + defer testleak.AfterTest(c)() + store, dom, err := newStoreWithBootstrap() + c.Assert(err, IsNil) + tk := testkit.NewTestKit(c, store) + defer func() { + dom.Close() + store.Close() + }() + tk.MustExec("use test") + tk.MustExec("drop table if exists t1") + tk.MustExec("create table t1(a int, b int as (a+1) virtual, c int as (b+1) virtual, d int as (c+1) virtual, key(b), index idx(c, d))") + tk.MustExec("insert into t1 (a) values (0)") + + tk.MustIndexRead("select b from t1 where b=1").Check(testkit.Rows("1")) + tk.MustIndexLookup("select b, c, d from t1 where b=1").Check(testkit.Rows("1 2 3")) + tk.MustIndexLookup("select * from t1 where b=1").Check(testkit.Rows("0 1 2 3")) + tk.MustIndexRead("select c from t1 where c=2 and d=3").Check(testkit.Rows("2")) + + tk.MustExec("insert into t1 (a) values (1)") + tk.MustQuery("select /*+ TIDB_INLJ(o, i) */ i.b, o.a from t1 o, t1 i where i.b = o.a").Check(testkit.Rows("1 1")) + tk.MustQuery("explain select /*+ TIDB_INLJ(o, i) */ * from t1 o, t1 i where i.b = o.a").Check(testkit.Rows( + "IndexJoin_12 12487.50 root inner join, inner:Projection_10, outer key:test.o.a, inner key:test.i.b", + "├─Projection_15 9990.00 root test.o.a, cast(plus(test.o.a, 1)), cast(plus(cast(plus(test.o.a, 1)), 1)), cast(plus(cast(plus(cast(plus(test.o.a, 1)), 1)), 1))", + "│ └─TableReader_16 9990.00 root data:Selection_14", + "│ └─Selection_14 9990.00 cop not(isnull(test.o.a))", + "│ └─TableScan_13 10000.00 cop table:o, range:[-inf,+inf], keep order:false, stats:pseudo", + "└─Projection_10 9.99 root test.i.a, cast(plus(test.i.a, 1)), cast(plus(cast(plus(test.i.a, 1)), 1)), cast(plus(cast(plus(cast(plus(test.i.a, 1)), 1)), 1))", + " └─IndexLookUp_11 9.99 root ", + " ├─Selection_9 9.99 cop not(isnull(test.i.b))", + " │ └─IndexScan_7 10.00 cop table:i, index:b, range: decided by [eq(test.i.b, test.o.a)], keep order:false, stats:pseudo", + " └─TableScan_8 9.99 cop table:t1, keep order:false, stats:pseudo")) + + tk.MustExec(`CREATE TABLE person ( + id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL, + address_info JSON, + city_no INT AS (JSON_EXTRACT(address_info, '$.city_no')) VIRTUAL, + KEY(city_no))`) + tk.MustExec(`INSERT INTO person (name, address_info) VALUES ("John", CAST('{"city_no": 1}' AS JSON))`) + tk.MustIndexLookup(`SELECT name FROM person where city_no=1`).Check(testkit.Rows("John")) } func (s *testAnalyzeSuite) TestLimitCrossEstimation(c *C) { diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index 5ff411d6e5e7f..d9cfc3cb72e8e 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -435,7 +435,11 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou keyOff2IdxOff[i] = 0 } if pkMatched { - innerPlan := p.constructInnerTableScan(ds, pkCol, outerJoinKeys, us) + innerPlan, err := p.constructInnerTableScan(ds, pkCol, outerJoinKeys, us) + if err != nil { + logutil.BgLogger().Error("construct inner table scan error", zap.Error(err)) + return nil + } // Since the primary key means one value corresponding to exact one row, this will always be a no worse one // comparing to other index. return p.constructIndexJoin(prop, outerIdx, innerPlan, nil, keyOff2IdxOff, nil, nil) @@ -464,7 +468,11 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou } idxCols, lens := expression.IndexInfo2Cols(ds.schema.Columns, helper.chosenIndexInfo) rangeInfo := helper.buildRangeDecidedByInformation(idxCols, outerJoinKeys) - innerPlan := p.constructInnerIndexScan(ds, helper.chosenIndexInfo, helper.chosenRemained, outerJoinKeys, us, rangeInfo) + innerPlan, err := p.constructInnerIndexScan(ds, helper.chosenIndexInfo, helper.chosenRemained, outerJoinKeys, us, rangeInfo) + if err != nil { + logutil.BgLogger().Error("construct inner index scan error", zap.Error(err)) + return nil + } return p.constructIndexJoin(prop, outerIdx, innerPlan, helper.chosenRanges, keyOff2IdxOff, lens, helper.lastColManager) } return nil @@ -514,7 +522,7 @@ func (ijHelper *indexJoinBuildHelper) buildRangeDecidedByInformation(idxCols []* } // constructInnerTableScan is specially used to construct the inner plan for PhysicalIndexJoin. -func (p *LogicalJoin) constructInnerTableScan(ds *DataSource, pk *expression.Column, outerJoinKeys []*expression.Column, us *LogicalUnionScan) PhysicalPlan { +func (p *LogicalJoin) constructInnerTableScan(ds *DataSource, pk *expression.Column, outerJoinKeys []*expression.Column, us *LogicalUnionScan) (PhysicalPlan, error) { ranges := ranger.FullIntRange(mysql.HasUnsignedFlag(pk.RetType.Flag)) ts := PhysicalTableScan{ Table: ds.tableInfo, @@ -533,15 +541,20 @@ func (p *LogicalJoin) constructInnerTableScan(ds *DataSource, pk *expression.Col ts.stats.StatsVersion = statistics.PseudoVersion } - copTask := &copTask{ + cop := &copTask{ tablePlan: ts, indexPlanFinished: true, } selStats := ts.stats.Scale(selectionFactor) - ts.addPushedDownSelection(copTask, selStats) - t := finishCopTask(ds.ctx, copTask) + t, err := ds.pushDownSelAndResolveVirtualCols(cop, nil, selStats) + if err != nil { + return nil, err + } + if cop, ok := t.(*copTask); ok { + t = finishCopTask(ds.ctx, cop) + } reader := t.plan() - return p.constructInnerUnionScan(us, reader) + return p.constructInnerUnionScan(us, reader), nil } func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader PhysicalPlan) PhysicalPlan { @@ -557,7 +570,7 @@ func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader Physi // constructInnerIndexScan is specially used to construct the inner plan for PhysicalIndexJoin. func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexInfo, filterConds []expression.Expression, - outerJoinKeys []*expression.Column, us *LogicalUnionScan, rangeInfo string) PhysicalPlan { + outerJoinKeys []*expression.Column, us *LogicalUnionScan, rangeInfo string) (PhysicalPlan, error) { is := PhysicalIndexScan{ Table: ds.tableInfo, TableAsName: ds.TableAsName, @@ -611,10 +624,15 @@ func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexIn } selectivity := ds.stats.RowCount / ds.tableStats.RowCount finalStats := ds.stats.ScaleByExpectCnt(selectivity * rowCount) - is.addPushedDownSelection(cop, ds, path, finalStats) - t := finishCopTask(ds.ctx, cop) + t, err := ds.pushDownSelAndResolveVirtualCols(cop, path, finalStats) + if err != nil { + return nil, err + } + if cop, ok := t.(*copTask); ok { + t = finishCopTask(ds.ctx, cop) + } reader := t.plan() - return p.constructInnerUnionScan(us, reader) + return p.constructInnerUnionScan(us, reader), nil } var symmetricOp = map[string]string{ diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 40e20ea43287d..dea9ddd30f64a 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -16,6 +16,7 @@ package core import ( "math" + "github.com/pingcap/errors" "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/expression" @@ -517,7 +518,6 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candid } cop.cst = rowCount * scanFactor - task = cop if candidate.isMatchProp { if prop.Items[0].Desc { is.Desc = true @@ -531,11 +531,17 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candid is.KeepOrder = true } // prop.IsEmpty() would always return true when coming to here, - // so we can just use prop.ExpectedCnt as parameter of addPushedDownSelection. + // so we can just use prop.ExpectedCnt as parameter of addIndexScanSelection. finalStats := ds.stats.ScaleByExpectCnt(prop.ExpectedCnt) - is.addPushedDownSelection(cop, ds, path, finalStats) + + task, err = ds.pushDownSelAndResolveVirtualCols(cop, path, finalStats) + if err != nil { + return invalidTask, err + } if prop.TaskTp == property.RootTaskType { - task = finishCopTask(ds.ctx, task) + if cop, ok := task.(*copTask); ok { + task = finishCopTask(ds.ctx, cop) + } } else if _, ok := task.(*rootTask); ok { return invalidTask, nil } @@ -574,8 +580,12 @@ func (is *PhysicalIndexScan) initSchema(id int, idx *model.IndexInfo, isDoubleRe is.SetSchema(expression.NewSchema(indexCols...)) } -func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, path *accessPath, finalStats *property.StatsInfo) { +func (ds *DataSource) addIndexScanSelection(copTask *copTask, path *accessPath) error { // Add filter condition to table plan now. + is, ok := copTask.indexPlan.(*PhysicalIndexScan) + if !ok { + return errors.Errorf("type assertion fail, expect PhysicalIndexScan, but got %T", copTask.indexPlan) + } indexConds, tableConds := path.indexFilters, path.tableFilters if indexConds != nil { copTask.cst += copTask.count() * cpuFactor @@ -591,11 +601,13 @@ func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSou } if tableConds != nil { copTask.finishIndexPlan() - copTask.cst += copTask.count() * cpuFactor - tableSel := PhysicalSelection{Conditions: tableConds}.Init(is.ctx, finalStats) - tableSel.SetChildren(copTask.tablePlan) - copTask.tablePlan = tableSel + ts, ok := copTask.tablePlan.(*PhysicalTableScan) + if !ok { + return errors.Errorf("type assertion fail, expect PhysicalTableScan, but got %T", copTask.tablePlan) + } + ts.filterCondition = append(ts.filterCondition, tableConds...) } + return nil } func matchIndicesProp(idxCols []*model.IndexColumn, propItems []property.Item) bool { @@ -802,11 +814,10 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, candid ts.Ranges = path.ranges ts.AccessCondition, ts.filterCondition = path.accessConds, path.tableFilters rowCount := path.countAfterAccess - copTask := &copTask{ + cop := &copTask{ tablePlan: ts, indexPlanFinished: true, } - task = copTask // Adjust number of rows we actually need to scan if prop.ExpectedCnt is smaller than the count we calculated. if prop.ExpectedCnt < ds.stats.RowCount { count, ok, corr := ds.crossEstimateRowCount(path, prop.ExpectedCnt, candidate.isMatchProp && prop.Items[0].Desc) @@ -831,25 +842,132 @@ func (ds *DataSource) convertToTableScan(prop *property.PhysicalProperty, candid ts.stats.StatsVersion = statistics.PseudoVersion } - copTask.cst = rowCount * scanFactor + cop.cst = rowCount * scanFactor if candidate.isMatchProp { if prop.Items[0].Desc { ts.Desc = true - copTask.cst = rowCount * descScanFactor + cop.cst = rowCount * descScanFactor } ts.KeepOrder = true - copTask.keepOrder = true + cop.keepOrder = true + } + + task, err = ds.pushDownSelAndResolveVirtualCols(cop, path, ds.stats.ScaleByExpectCnt(prop.ExpectedCnt)) + if err != nil { + return invalidTask, err } - ts.addPushedDownSelection(copTask, ds.stats.ScaleByExpectCnt(prop.ExpectedCnt)) if prop.TaskTp == property.RootTaskType { - task = finishCopTask(ds.ctx, task) + if cop, ok := task.(*copTask); ok { + task = finishCopTask(ds.ctx, cop) + } } else if _, ok := task.(*rootTask); ok { return invalidTask, nil } return task, nil } -func (ts *PhysicalTableScan) addPushedDownSelection(copTask *copTask, stats *property.StatsInfo) { +// pushDownSelAndResolveVirtualCols push some filters down to coprocessor and resolve virtual columns. +// 1. push down filters and check if there is virtual column. +// 2. substitute virtual columns in TableScan's Schema to corresponding physical columns. +// 3. substitute virtual columns in TableScan's filters and push them down. +// 4. add a Projection upon this DataSource. +func (ds *DataSource) pushDownSelAndResolveVirtualCols(copTask *copTask, path *accessPath, stats *property.StatsInfo) (t task, err error) { + // step 1 + t = copTask + if copTask.indexPlan != nil { + if err := ds.addIndexScanSelection(copTask, path); err != nil { + return invalidTask, err + } + } + if copTask.tablePlan == nil { // don't need to handle virtual columns in IndexScan + return + } + ts, ok := copTask.tablePlan.(*PhysicalTableScan) + if !ok { + return invalidTask, errors.Errorf("type assertion fail, expect PhysicalTableScan, but got %T", copTask.tablePlan) + } + if len(ds.virtualColExprs) == 0 { + ds.addTableScanSelection(copTask, ts, stats) + return + } + + // step 2 + ds.substituteVirtualColumns(ts) + + // step 3 + for i, expr := range ts.filterCondition { + ts.filterCondition[i] = expression.ColumnSubstitute(expr, ds.virtualColSchema, ds.virtualColExprs) + } + var cantBePushed []expression.Expression + _, ts.filterCondition, cantBePushed = expression.ExpressionsToPB(ds.ctx.GetSessionVars().StmtCtx, ts.filterCondition, ds.ctx.GetClient()) + ds.addTableScanSelection(copTask, ts, stats) + if len(cantBePushed) > 0 { // for filters cannot be pushed down, we add a root Selection to handle them + sel := PhysicalSelection{Conditions: cantBePushed}.Init(ds.ctx, stats) + t = sel.attach2Task(copTask) + } + + // step 4 + projExprs := make([]expression.Expression, 0, len(ds.Schema().Columns)) + for _, c := range ds.Schema().Columns { + projExprs = append(projExprs, expression.ColumnSubstitute(c, ds.virtualColSchema, ds.virtualColExprs)) + } + proj := PhysicalProjection{Exprs: projExprs}.Init(ds.ctx, stats) + proj.SetSchema(ds.Schema()) + t = proj.attach2Task(t) + return +} + +// substituteVirtualColumns substitute virtual columns in TableScan's Schema to physical columns. +func (ds *DataSource) substituteVirtualColumns(ts *PhysicalTableScan) { + // clone a new Schema to modify for safety + schema := ts.Schema().Clone() + colInfos := make([]*model.ColumnInfo, 0, len(ts.Columns)) + colInfos = append(colInfos, ts.Columns...) + + // derive physical columns from virtual columns + phyCols := make(map[int64]*expression.Column) // physical columns this ts already has + virCols := make(map[int64]*expression.Column) // virtual columns this ts has + phyColsFromVir := make(map[int64]*expression.Column) // physical columns derived from virtual columns + for i := 0; i < len(schema.Columns) && i < len(colInfos); i++ { + if colInfos[i].IsGenerated() && !colInfos[i].GeneratedStored { + virCols[schema.Columns[i].UniqueID] = schema.Columns[i] + expr := expression.ColumnSubstitute(schema.Columns[i], ds.virtualColSchema, ds.virtualColExprs) + for _, phyColFromVir := range expression.ExtractColumns(expr) { + phyColsFromVir[phyColFromVir.UniqueID] = phyColFromVir + } + } else { + phyCols[schema.Columns[i].UniqueID] = schema.Columns[i] + } + } + + // remove virtual columns and add new derived physical columns + for i := 0; i < len(schema.Columns); i++ { + if _, ok := virCols[schema.Columns[i].UniqueID]; ok { + schema.Columns = append(schema.Columns[:i], schema.Columns[i+1:]...) + colInfos = append(colInfos[:i], colInfos[i+1:]...) + } + } + for id, col := range phyColsFromVir { + if _, ok := phyCols[id]; !ok { + schema.Columns = append(schema.Columns, col) + var colInfo *model.ColumnInfo + for _, ci := range ds.tableInfo.Cols() { + if ci.Name.O == col.ColName.O { + colInfo = ci + break + } + } + colInfos = append(colInfos, colInfo) + phyCols[id] = col + } + } + + // update TableScan's Schema + ts.Columns = colInfos + ts.SetSchema(schema) +} + +func (ds *DataSource) addTableScanSelection(copTask *copTask, ts *PhysicalTableScan, stats *property.StatsInfo) { // Add filter condition to table plan now. if len(ts.filterCondition) > 0 { copTask.cst += copTask.count() * cpuFactor diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index bc2087d1190b3..418756b82b4fb 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -43,7 +43,7 @@ import ( "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/table/tables" "github.com/pingcap/tidb/types" - driver "github.com/pingcap/tidb/types/parser_driver" + "github.com/pingcap/tidb/types/parser_driver" "github.com/pingcap/tidb/util/chunk" ) @@ -2311,17 +2311,9 @@ func (b *PlanBuilder) buildDataSource(ctx context.Context, tn *ast.TableName) (L result = us } - // If this table contains any virtual generated columns, we need a - // "Projection" to calculate these columns. - proj, err := b.projectVirtualColumns(ctx, ds, columns) - if err != nil { + if err := b.prepareVirtualColumns(ctx, ds, columns); err != nil { return nil, err } - - if proj != nil { - proj.SetChildren(result) - result = proj - } return result, nil } @@ -2382,25 +2374,23 @@ func (b *PlanBuilder) BuildDataSourceFromView(ctx context.Context, dbName model. return projUponView, nil } -// projectVirtualColumns is only for DataSource. If some table has virtual generated columns, -// we add a projection on the original DataSource, and calculate those columns in the projection -// so that plans above it can reference generated columns by their name. -func (b *PlanBuilder) projectVirtualColumns(ctx context.Context, ds *DataSource, columns []*table.Column) (*LogicalProjection, error) { - hasVirtualGeneratedColumn := false +// prepareVirtualColumns is only for DataSource. +// It prepares virtualColExprs and virtualColSchema for table which has virtual generated columns. +// virtualColExprs and virtualColSchema are used to rewrite virtual columns in pushDownSelAndResolveVirtualCols. +func (b *PlanBuilder) prepareVirtualColumns(ctx context.Context, ds *DataSource, columns []*table.Column) error { + hasVirtualCol := false for _, column := range columns { if column.IsGenerated() && !column.GeneratedStored { - hasVirtualGeneratedColumn = true + hasVirtualCol = true break } } - if !hasVirtualGeneratedColumn { - return nil, nil + if !hasVirtualCol { + return nil } - proj := LogicalProjection{ - Exprs: make([]expression.Expression, 0, len(columns)), - calculateGenCols: true, - }.Init(b.ctx) + ds.virtualColSchema = ds.Schema().Clone() + ds.virtualColExprs = make([]expression.Expression, 0, len(columns)) for i, colExpr := range ds.Schema().Columns { var exprIsGen = false var expr expression.Expression @@ -2409,7 +2399,7 @@ func (b *PlanBuilder) projectVirtualColumns(ctx context.Context, ds *DataSource, var err error expr, _, err = b.rewrite(ctx, columns[i].GeneratedExpr, ds, nil, true) if err != nil { - return nil, err + return err } // Because the expression might return different type from // the generated column, we should wrap a CAST on the result. @@ -2420,7 +2410,7 @@ func (b *PlanBuilder) projectVirtualColumns(ctx context.Context, ds *DataSource, if !exprIsGen { expr = colExpr } - proj.Exprs = append(proj.Exprs, expr) + ds.virtualColExprs = append(ds.virtualColExprs, expr) } // Re-iterate expressions to handle those virtual generated columns that refers to the other generated columns, for @@ -2430,12 +2420,10 @@ func (b *PlanBuilder) projectVirtualColumns(ctx context.Context, ds *DataSource, // column a, column b as (a * 2), column c as ((a * 2) + 1) // A generated column definition can refer to only generated columns occurring earlier in the table definition, so // it's safe to iterate in index-ascending order. - for i, expr := range proj.Exprs { - proj.Exprs[i] = expression.ColumnSubstitute(expr, ds.Schema(), proj.Exprs) + for i, expr := range ds.virtualColExprs { + ds.virtualColExprs[i] = expression.ColumnSubstitute(expr, ds.Schema(), ds.virtualColExprs) } - - proj.SetSchema(ds.Schema().Clone()) - return proj, nil + return nil } // buildApplyWithJoinType builds apply plan with outerPlan and innerPlan, which apply join with particular join type for diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index 25094e5d0028b..79557c1cc15be 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -353,6 +353,10 @@ type DataSource struct { // handleCol represents the handle column for the datasource, either the // int primary key column or extra handle column. handleCol *expression.Column + + // fields for virtual generated columns + virtualColSchema *expression.Schema + virtualColExprs []expression.Expression } // accessPath indicates the way we access a table: by using single index, or by using multiple indexes, diff --git a/util/testkit/testkit.go b/util/testkit/testkit.go index 947afe70961de..04af0919b7009 100644 --- a/util/testkit/testkit.go +++ b/util/testkit/testkit.go @@ -185,12 +185,17 @@ func (tk *TestKit) MustExec(sql string, args ...interface{}) { } } -// MustIndexLookup checks whether the plan for the sql is Point_Get. +// MustIndexLookup checks whether the plan for the sql is IndexLookUp. func (tk *TestKit) MustIndexLookup(sql string, args ...interface{}) *Result { + return tk.MustPlan("IndexLookUp", sql, args...) +} + +// MustPlan checks whether the plan for the sql is the specific plan. +func (tk *TestKit) MustPlan(plan, sql string, args ...interface{}) *Result { rs := tk.MustQuery("explain "+sql, args...) hasIndexLookup := false for i := range rs.rows { - if strings.Contains(rs.rows[i][0], "IndexLookUp") { + if strings.Contains(rs.rows[i][0], plan) { hasIndexLookup = true break } @@ -199,12 +204,14 @@ func (tk *TestKit) MustIndexLookup(sql string, args ...interface{}) *Result { return tk.MustQuery(sql, args...) } +// MustIndexRead checks whether the plan for this sql is IndexReader. +func (tk *TestKit) MustIndexRead(sql string, args ...interface{}) *Result { + return tk.MustPlan("IndexReader", sql, args...) +} + // MustPointGet checks whether the plan for the sql is Point_Get. func (tk *TestKit) MustPointGet(sql string, args ...interface{}) *Result { - rs := tk.MustQuery("explain "+sql, args...) - tk.c.Assert(len(rs.rows), check.Equals, 1) - tk.c.Assert(strings.Contains(rs.rows[0][0], "Point_Get"), check.IsTrue) - return tk.MustQuery(sql, args...) + return tk.MustPlan("Point_Get", sql, args...) } // MustQuery query the statements and returns result rows.