Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: check clustered index don't need double read #18054

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
tblCols: ds.TblCols,
keepOrder: is.KeepOrder,
}
if !isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, is.Table.PKIsHandle) {
if !ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, is.Table) {
// On this way, it's double read case.
ts := PhysicalTableScan{
Columns: ds.Columns,
Expand All @@ -864,7 +864,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask(
cop.tablePlan = ts
}
is.initSchema(path.Index, path.FullIdxCols, cop.tablePlan != nil)
indexConds, tblConds := splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
indexConds, tblConds := ds.splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
// Specially handle cases when input rowCount is 0, which can only happen in 2 scenarios:
// - estimated row count of outer plan is 0;
// - estimated row count of inner "DataSource + filters" is 0;
Expand Down
101 changes: 50 additions & 51 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/table/tables"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/logutil"
Expand Down Expand Up @@ -407,7 +406,7 @@ func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candida
if path.IsTablePath() {
currentCandidate = ds.getTableCandidate(path, prop)
} else {
coveredByIdx := isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo.PKIsHandle)
coveredByIdx := ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
if len(path.AccessConds) > 0 || !prop.IsEmpty() || path.Forced || coveredByIdx {
// We will use index to generate physical plan if any of the following conditions is satisfied:
// 1. This path's access cond is not nil.
Expand Down Expand Up @@ -700,17 +699,19 @@ func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty,
return ts, partialCost
}

func isCoveringIndex(columns, indexColumns []*expression.Column, idxColLens []int, pkIsHandle bool) bool {
func (ds *DataSource) isCoveringIndex(columns, indexColumns []*expression.Column, idxColLens []int, tblInfo *model.TableInfo) bool {
indexCols := append(indexColumns, ds.commonHandleCols...)
indexColLens := append(idxColLens, ds.commonHandleLens...)
for _, col := range columns {
if pkIsHandle && mysql.HasPriKeyFlag(col.RetType.Flag) {
if tblInfo.PKIsHandle && mysql.HasPriKeyFlag(col.RetType.Flag) {
continue
}
if col.ID == model.ExtraHandleID {
continue
}
isIndexColumn := false
for i, indexCol := range indexColumns {
isFullLen := idxColLens[i] == types.UnspecifiedLength || idxColLens[i] == col.RetType.Flen
for i, indexCol := range indexCols {
Copy link
Member

@coocood coocood Jun 16, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can extract this for loop, and call it twice with indexColumns and commonHandleCols.
So we don't need to append slices.

isFullLen := indexColLens[i] == types.UnspecifiedLength || indexColLens[i] == col.RetType.Flen
// We use col.OrigColName instead of col.ColName.
// Related issue: https://github.com/pingcap/tidb/issues/9636.
if indexCol != nil && col.Equal(nil, indexCol) && isFullLen {
Expand Down Expand Up @@ -774,7 +775,7 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candid
cop.cst = cost
task = cop
if candidate.isMatchProp {
if cop.tablePlan != nil {
if cop.tablePlan != nil && !ds.tableInfo.IsCommonHandle {
col, isNew := cop.tablePlan.(*PhysicalTableScan).appendExtraHandleCol(ds)
cop.extraHandleCol = col
cop.doubleReadNeedProj = isNew
Expand Down Expand Up @@ -814,52 +815,50 @@ func (is *PhysicalIndexScan) indexScanRowSize(idx *model.IndexInfo, ds *DataSour
func (is *PhysicalIndexScan) initSchema(idx *model.IndexInfo, idxExprCols []*expression.Column, isDoubleRead bool) {
indexCols := make([]*expression.Column, len(is.IdxCols), len(idx.Columns)+1)
copy(indexCols, is.IdxCols)
for i := len(is.IdxCols); i < len(idx.Columns); i++ {
if idxExprCols[i] != nil {
indexCols = append(indexCols, idxExprCols[i])
} else {
// TODO: try to reuse the col generated when building the DataSource.
indexCols = append(indexCols, &expression.Column{
ID: is.Table.Columns[idx.Columns[i].Offset].ID,
RetType: &is.Table.Columns[idx.Columns[i].Offset].FieldType,
UniqueID: is.ctx.GetSessionVars().AllocPlanColumnID(),
})
}
}
setHandle := len(indexCols) > len(idx.Columns)
if !setHandle {
for i, col := range is.Columns {
if (mysql.HasPriKeyFlag(col.Flag) && is.Table.PKIsHandle) || col.ID == model.ExtraHandleID {
indexCols = append(indexCols, is.dataSourceSchema.Columns[i])
setHandle = true
break
is.NeedCommonHandle = is.Table.IsCommonHandle

if !is.NeedCommonHandle {
for i := len(is.IdxCols); i < len(idx.Columns); i++ {
if idxExprCols[i] != nil {
indexCols = append(indexCols, idxExprCols[i])
} else {
// TODO: try to reuse the col generated when building the DataSource.
indexCols = append(indexCols, &expression.Column{
ID: is.Table.Columns[idx.Columns[i].Offset].ID,
RetType: &is.Table.Columns[idx.Columns[i].Offset].FieldType,
UniqueID: is.ctx.GetSessionVars().AllocPlanColumnID(),
})
}
}
}

if is.Table.IsCommonHandle {
pkIdx := tables.FindPrimaryIndex(is.Table)
for _, col := range pkIdx.Columns {
indexCols = append(indexCols, &expression.Column{
ID: is.Table.Columns[col.Offset].ID,
RetType: &is.Table.Columns[col.Offset].FieldType,
UniqueID: is.ctx.GetSessionVars().AllocPlanColumnID(),
})
setHandle := len(indexCols) > len(idx.Columns)
if !setHandle {
for i, col := range is.Columns {
if (mysql.HasPriKeyFlag(col.Flag) && is.Table.PKIsHandle) || col.ID == model.ExtraHandleID {
indexCols = append(indexCols, is.dataSourceSchema.Columns[i])
setHandle = true
break
}
}
}
is.NeedCommonHandle = true
}

// If it's double read case, the first index must return handle. So we should add extra handle column
// if there isn't a handle column.
if isDoubleRead && !setHandle {
if !is.Table.IsCommonHandle {
indexCols = append(indexCols, &expression.Column{
RetType: types.NewFieldType(mysql.TypeLonglong),
ID: model.ExtraHandleID,
UniqueID: is.ctx.GetSessionVars().AllocPlanColumnID(),
})
// If it's double read case, the first index must return handle. So we should add extra handle column
// if there isn't a handle column.
if isDoubleRead && !setHandle {
if !is.Table.IsCommonHandle {
indexCols = append(indexCols, &expression.Column{
RetType: types.NewFieldType(mysql.TypeLonglong),
ID: model.ExtraHandleID,
UniqueID: is.ctx.GetSessionVars().AllocPlanColumnID(),
})
}
}
} else {
if len(is.IdxCols) < len(is.Columns) {
for i := len(is.IdxCols); i < len(idxExprCols); i++ {
indexCols = append(indexCols, idxExprCols[i])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to check if idxExprCols[i] != nil?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idxExprCols here for common handle never be nil.

}
}
}

is.SetSchema(expression.NewSchema(indexCols...))
}

Expand Down Expand Up @@ -922,11 +921,11 @@ func matchIndicesProp(idxCols []*expression.Column, colLens []int, propItems []p
return true
}

func splitIndexFilterConditions(conditions []expression.Expression, indexColumns []*expression.Column, idxColLens []int,
func (ds *DataSource) splitIndexFilterConditions(conditions []expression.Expression, indexColumns []*expression.Column, idxColLens []int,
table *model.TableInfo) (indexConds, tableConds []expression.Expression) {
var indexConditions, tableConditions []expression.Expression
for _, cond := range conditions {
if isCoveringIndex(expression.ExtractColumns(cond), indexColumns, idxColLens, table.PKIsHandle) {
if ds.isCoveringIndex(expression.ExtractColumns(cond), indexColumns, idxColLens, table) {
indexConditions = append(indexConditions, cond)
} else {
tableConditions = append(tableConditions, cond)
Expand Down Expand Up @@ -1408,7 +1407,7 @@ func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProper
is.Hist = &statsTbl.Indices[idx.ID].Histogram
}
rowCount := path.CountAfterAccess
is.initSchema(idx, path.FullIdxCols, !isSingleScan)
is.initSchema(idx, append(path.FullIdxCols, ds.commonHandleCols...), !isSingleScan)
// Only use expectedCnt when it's smaller than the count we calculated.
// e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate
// If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be.
Expand Down
9 changes: 9 additions & 0 deletions planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2859,6 +2859,15 @@ func (b *PlanBuilder) buildDataSource(ctx context.Context, tn *ast.TableName, as
ds.names = names
ds.setPreferredStoreType(b.TableHints())

// Init commonHandleCols and commonHandleLens for data source.
if tableInfo.IsCommonHandle {
for _, idx := range tableInfo.Indices {
if idx.Primary {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a function tables.FindPrimaryIndex can be used.

ds.commonHandleCols, ds.commonHandleLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, idx)
break
}
}
}
// Init FullIdxCols, FullIdxColLens for accessPaths.
for _, path := range ds.possibleAccessPaths {
if !path.IsTablePath() {
Expand Down
7 changes: 5 additions & 2 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,9 @@ type DataSource struct {
// TblCols contains the original columns of table before being pruned, and it
// is used for estimating table scan cost.
TblCols []*expression.Column
// commonHandleCols and commonHandleLens save the info of primary key which is the clustered index.
commonHandleCols []*expression.Column
commonHandleLens []int
// TblColHists contains the Histogram of all original table columns,
// it is converted from statisticTable, and used for IO/network cost estimating.
TblColHists *statistics.HistColl
Expand Down Expand Up @@ -626,7 +629,7 @@ func (ds *DataSource) Convert2Gathers() (gathers []LogicalPlan) {
path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.Index)
path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.schema.Columns, path.Index)
// If index columns can cover all of the needed columns, we can use a IndexGather + IndexScan.
if isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo.PKIsHandle) {
if ds.isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) {
gathers = append(gathers, ds.buildIndexGather(path))
}
// TODO: If index columns can not cover the schema, use IndexLookUpGather.
Expand Down Expand Up @@ -857,7 +860,7 @@ func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, conds []expres
}
}
}
path.IndexFilters, path.TableFilters = splitIndexFilterConditions(path.TableFilters, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
path.IndexFilters, path.TableFilters = ds.splitIndexFilterConditions(path.TableFilters, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo)
// If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info.
// We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity.
if path.CountAfterAccess < ds.stats.RowCount && !isIm {
Expand Down
4 changes: 3 additions & 1 deletion planner/core/testdata/integration_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,9 @@
"select * from t1",
"select * from t1 where t1.a >= 1 and t1.a < 4",
"select * from t1 where t1.a = 1 and t1.b < \"333\"",
"select * from t1 where t1.c = 3.3"
"select * from t1 where t1.c = 3.3",
"select t1.b, t1.c from t1 where t1.c = 2.2",
"select /*+ use_index(t1, c) */ * from t1"
]
}
]
27 changes: 24 additions & 3 deletions planner/core/testdata/integration_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -682,13 +682,34 @@
{
"SQL": "select * from t1 where t1.c = 3.3",
"Plan": [
"TableReader_7 1.00 root data:Selection_6",
"└─Selection_6 1.00 cop[tikv] eq(test.t1.c, 3.3)",
" └─TableFullScan_5 3.00 cop[tikv] table:t1 keep order:false"
"IndexReader_6 1.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 1.00 cop[tikv] table:t1, index:c(c) range:[3.3000000000,3.3000000000], keep order:false"
],
"Res": [
"3 333 3.3000000000"
]
},
{
"SQL": "select t1.b, t1.c from t1 where t1.c = 2.2",
"Plan": [
"IndexReader_6 1.00 root index:IndexRangeScan_5",
"└─IndexRangeScan_5 1.00 cop[tikv] table:t1, index:c(c) range:[2.2000000000,2.2000000000], keep order:false"
],
"Res": [
"222 2.2000000000"
]
},
{
"SQL": "select /*+ use_index(t1, c) */ * from t1",
"Plan": [
"IndexReader_5 3.00 root index:IndexFullScan_4",
"└─IndexFullScan_4 3.00 cop[tikv] table:t1, index:c(c) keep order:false"
],
"Res": [
"1 111 1.1000000000",
"2 222 2.2000000000",
"3 333 3.3000000000"
]
}
]
}
Expand Down