Skip to content

Commit

Permalink
planner: update the scan-row-size calculation formula in model2 (#38968)
Browse files Browse the repository at this point in the history
ref #35240
  • Loading branch information
qw4990 authored Nov 9, 2022
1 parent 10115e0 commit ce62915
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 172 deletions.
4 changes: 2 additions & 2 deletions planner/core/plan_cost_ver1.go
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ func (p *PhysicalHashJoin) GetCost(lCnt, rCnt float64, isMPP bool, costFlag uint
sessVars := p.ctx.GetSessionVars()
oomUseTmpStorage := variable.EnableTmpStorageOnOOM.Load()
memQuota := sessVars.MemTracker.GetBytesLimit() // sessVars.MemQuotaQuery && hint
rowSize := getAvgRowSize(build.statsInfo(), build.Schema())
rowSize := getAvgRowSize(build.statsInfo(), build.Schema().Columns)
spill := oomUseTmpStorage && memQuota > 0 && rowSize*buildCnt > float64(memQuota) && p.storeTp != kv.TiFlash
// Cost of building hash table.
cpuFactor := sessVars.GetCPUFactor()
Expand Down Expand Up @@ -1049,7 +1049,7 @@ func (p *PhysicalSort) GetCost(count float64, schema *expression.Schema) float64

oomUseTmpStorage := variable.EnableTmpStorageOnOOM.Load()
memQuota := sessVars.MemTracker.GetBytesLimit() // sessVars.MemQuotaQuery && hint
rowSize := getAvgRowSize(p.statsInfo(), schema)
rowSize := getAvgRowSize(p.statsInfo(), schema.Columns)
spill := oomUseTmpStorage && memQuota > 0 && rowSize*count > float64(memQuota)
diskCost := count * sessVars.GetDiskFactor() * rowSize
if !spill {
Expand Down
32 changes: 19 additions & 13 deletions planner/core/plan_cost_ver2.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func (p *PhysicalIndexScan) getPlanCostVer2(taskType property.TaskType, option *
}

rows := getCardinality(p, option.CostFlag)
rowSize := math.Max(p.getScanRowSize(), 2.0)
rowSize := math.Max(getAvgRowSize(p.stats, p.schema.Columns), 2.0) // consider all index columns
scanFactor := getTaskScanFactorVer2(p, taskType)

p.planCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
Expand All @@ -132,7 +132,13 @@ func (p *PhysicalTableScan) getPlanCostVer2(taskType property.TaskType, option *
}

rows := getCardinality(p, option.CostFlag)
rowSize := math.Max(p.getScanRowSize(), 2.0)
var rowSize float64
if p.StoreType == kv.TiKV {
rowSize = getAvgRowSize(p.stats, p.tblCols) // consider all columns if TiKV
} else { // TiFlash
rowSize = getAvgRowSize(p.stats, p.schema.Columns)
}
rowSize = math.Max(rowSize, 2.0)
scanFactor := getTaskScanFactorVer2(p, taskType)

p.planCostVer2 = scanCostVer2(option, rows, rowSize, scanFactor)
Expand All @@ -155,7 +161,7 @@ func (p *PhysicalIndexReader) getPlanCostVer2(taskType property.TaskType, option
}

rows := getCardinality(p.indexPlan, option.CostFlag)
rowSize := getAvgRowSize(p.indexPlan.Stats(), p.indexPlan.Schema())
rowSize := getAvgRowSize(p.indexPlan.Stats(), p.indexPlan.Schema().Columns)
netFactor := getTaskNetFactorVer2(p, taskType)
concurrency := float64(p.ctx.GetSessionVars().DistSQLScanConcurrency())

Expand All @@ -180,7 +186,7 @@ func (p *PhysicalTableReader) getPlanCostVer2(taskType property.TaskType, option
}

rows := getCardinality(p.tablePlan, option.CostFlag)
rowSize := getAvgRowSize(p.tablePlan.Stats(), p.tablePlan.Schema())
rowSize := getAvgRowSize(p.tablePlan.Stats(), p.tablePlan.Schema().Columns)
netFactor := getTaskNetFactorVer2(p, taskType)
concurrency := float64(p.ctx.GetSessionVars().DistSQLScanConcurrency())
childType := property.CopSingleReadTaskType
Expand Down Expand Up @@ -284,7 +290,7 @@ func (p *PhysicalIndexMergeReader) getPlanCostVer2(taskType property.TaskType, o
var tableSideCost costVer2
if tablePath := p.tablePlan; tablePath != nil {
rows := getCardinality(tablePath, option.CostFlag)
rowSize := getAvgRowSize(tablePath.Stats(), tablePath.Schema())
rowSize := getAvgRowSize(tablePath.Stats(), tablePath.Schema().Columns)

tableNetCost := netCostVer2(option, rows, rowSize, netFactor)
tableChildCost, err := tablePath.getPlanCostVer2(taskType, option)
Expand All @@ -297,7 +303,7 @@ func (p *PhysicalIndexMergeReader) getPlanCostVer2(taskType property.TaskType, o
indexSideCost := make([]costVer2, 0, len(p.partialPlans))
for _, indexPath := range p.partialPlans {
rows := getCardinality(indexPath, option.CostFlag)
rowSize := getAvgRowSize(indexPath.Stats(), indexPath.Schema())
rowSize := getAvgRowSize(indexPath.Stats(), indexPath.Schema().Columns)

indexNetCost := netCostVer2(option, rows, rowSize, netFactor)
indexChildCost, err := indexPath.getPlanCostVer2(taskType, option)
Expand Down Expand Up @@ -329,7 +335,7 @@ func (p *PhysicalSort) getPlanCostVer2(taskType property.TaskType, option *PlanC
}

rows := math.Max(getCardinality(p.children[0], option.CostFlag), 1)
rowSize := getAvgRowSize(p.statsInfo(), p.Schema())
rowSize := getAvgRowSize(p.statsInfo(), p.Schema().Columns)
cpuFactor := getTaskCPUFactorVer2(p, taskType)
memFactor := getTaskMemFactorVer2(p, taskType)
diskFactor := defaultVer2Factors.TiDBDisk
Expand Down Expand Up @@ -378,7 +384,7 @@ func (p *PhysicalTopN) getPlanCostVer2(taskType property.TaskType, option *PlanC

rows := getCardinality(p.children[0], option.CostFlag)
N := math.Max(1, float64(p.Count+p.Offset))
rowSize := getAvgRowSize(p.statsInfo(), p.Schema())
rowSize := getAvgRowSize(p.statsInfo(), p.Schema().Columns)
cpuFactor := getTaskCPUFactorVer2(p, taskType)
memFactor := getTaskMemFactorVer2(p, taskType)

Expand Down Expand Up @@ -429,7 +435,7 @@ func (p *PhysicalHashAgg) getPlanCostVer2(taskType property.TaskType, option *Pl

inputRows := getCardinality(p.children[0], option.CostFlag)
outputRows := getCardinality(p, option.CostFlag)
outputRowSize := getAvgRowSize(p.Stats(), p.Schema())
outputRowSize := getAvgRowSize(p.Stats(), p.Schema().Columns)
cpuFactor := getTaskCPUFactorVer2(p, taskType)
memFactor := getTaskMemFactorVer2(p, taskType)
concurrency := float64(p.ctx.GetSessionVars().HashAggFinalConcurrency())
Expand Down Expand Up @@ -501,7 +507,7 @@ func (p *PhysicalHashJoin) getPlanCostVer2(taskType property.TaskType, option *P
}
buildRows := getCardinality(build, option.CostFlag)
probeRows := getCardinality(probe, option.CostFlag)
buildRowSize := getAvgRowSize(build.Stats(), build.Schema())
buildRowSize := getAvgRowSize(build.Stats(), build.Schema().Columns)
tidbConcurrency := float64(p.Concurrency)
mppConcurrency := float64(3) // TODO: remove this empirical value
cpuFactor := getTaskCPUFactorVer2(p, taskType)
Expand Down Expand Up @@ -645,7 +651,7 @@ func (p *PhysicalExchangeReceiver) getPlanCostVer2(taskType property.TaskType, o
}

rows := getCardinality(p, option.CostFlag)
rowSize := getAvgRowSize(p.stats, p.Schema())
rowSize := getAvgRowSize(p.stats, p.Schema().Columns)
netFactor := getTaskNetFactorVer2(p, taskType)
isBCast := false
if sender, ok := p.children[0].(*PhysicalExchangeSender); ok {
Expand Down Expand Up @@ -678,7 +684,7 @@ func (p *PointGetPlan) getPlanCostVer2(taskType property.TaskType, option *PlanC
p.planCostInit = true
return zeroCostVer2, nil
}
rowSize := getAvgRowSize(p.stats, p.schema)
rowSize := getAvgRowSize(p.stats, p.schema.Columns)
netFactor := getTaskNetFactorVer2(p, taskType)

p.planCostVer2 = netCostVer2(option, 1, rowSize, netFactor)
Expand All @@ -698,7 +704,7 @@ func (p *BatchPointGetPlan) getPlanCostVer2(taskType property.TaskType, option *
return zeroCostVer2, nil
}
rows := getCardinality(p, option.CostFlag)
rowSize := getAvgRowSize(p.stats, p.schema)
rowSize := getAvgRowSize(p.stats, p.schema.Columns)
netFactor := getTaskNetFactorVer2(p, taskType)

p.planCostVer2 = netCostVer2(option, rows, rowSize, netFactor)
Expand Down
44 changes: 42 additions & 2 deletions planner/core/plan_cost_ver2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,55 @@ func TestCostModelShowFormula(t *testing.T) {
actual := make([][]interface{}, 0, len(plan))
for _, row := range plan {
actual = append(actual, []interface{}{row[0], row[3]}) // id,costFormula
fmt.Println(actual)
}
require.Equal(t, actual, [][]interface{}{
{"TableReader_7", "((Selection_6) + (net(2*rowsize(16)*tidb_kv_net_factor(3.96))))/15"},
{"└─Selection_6", "(cpu(3*filters(1)*tikv_cpu_factor(49.9))) + (TableFullScan_5)"},
{" └─TableFullScan_5", "scan(3*logrowsize(29)*tikv_scan_factor(40.7))"},
{" └─TableFullScan_5", "scan(3*logrowsize(32)*tikv_scan_factor(40.7))"},
})
}

func TestCostModelVer2ScanRowSize(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(`create table t (pk int, a int, b int, c int, d int, primary key(pk), index ab(a, b), index abc(a, b, c))`)
tk.MustExec("insert into t values (1, 1, 1, 1, 1)")
tk.MustExec(`set @@tidb_cost_model_version=2`)

cases := []struct {
query string
scanFormula string
}{
// index scan row-size on idx_ab is always equal to row-size(index_ab)
{"select a from t use index(ab) where a=1", "scan(1*logrowsize(32)*tikv_scan_factor(40.7))"},
{"select a, b from t use index(ab) where a=1", "scan(1*logrowsize(32)*tikv_scan_factor(40.7))"},
{"select b from t use index(ab) where a=1 and b=1", "scan(1*logrowsize(32)*tikv_scan_factor(40.7))"},
// index scan row-size on idx_abc is always equal to row-size(index_abc)
{"select a from t use index(abc) where a=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
{"select a from t use index(abc) where a=1 and b=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
{"select a, b from t use index(abc) where a=1 and b=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
{"select a, b, c from t use index(abc) where a=1 and b=1 and c=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
// table scan row-size is always equal to row-size(*)
{"select a from t use index(primary) where a=1", "scan(1*logrowsize(80)*tikv_scan_factor(40.7))"},
{"select a, d from t use index(primary) where a=1", "scan(1*logrowsize(80)*tikv_scan_factor(40.7))"},
{"select * from t use index(primary) where a=1", "scan(1*logrowsize(80)*tikv_scan_factor(40.7))"},
}
for _, c := range cases {
rs := tk.MustQuery("explain analyze format=true_card_cost " + c.query).Rows()
scan := rs[len(rs)-1]
formula := scan[3]
require.Equal(t, formula, c.scanFormula)
}

tk.MustQuery("explain select a from t where a=1").Check(testkit.Rows(
`IndexReader_6 10.00 root index:IndexRangeScan_5`, // use idx_ab automatically since it has the smallest row-size in all access paths.
`└─IndexRangeScan_5 10.00 cop[tikv] table:t, index:ab(a, b) range:[1,1], keep order:false, stats:pseudo`))
tk.MustQuery("explain select a, b, c from t where a=1").Check(testkit.Rows(
`IndexReader_6 10.00 root index:IndexRangeScan_5`, // use idx_abc automatically
`└─IndexRangeScan_5 10.00 cop[tikv] table:t, index:abc(a, b, c) range:[1,1], keep order:false, stats:pseudo`))
}

func TestCostModelTraceVer2(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
Expand Down
5 changes: 2 additions & 3 deletions planner/core/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,12 +297,11 @@ func (p *PhysicalIndexJoin) attach2Task(tasks ...task) task {
return t
}

func getAvgRowSize(stats *property.StatsInfo, schema *expression.Schema) (size float64) {
func getAvgRowSize(stats *property.StatsInfo, cols []*expression.Column) (size float64) {
if stats.HistColl != nil {
size = stats.HistColl.GetAvgRowSizeListInDisk(schema.Columns)
size = stats.HistColl.GetAvgRowSizeListInDisk(cols)
} else {
// Estimate using just the type info.
cols := schema.Columns
for _, col := range cols {
size += float64(chunk.EstimateTypeWidth(col.GetType()))
}
Expand Down
Loading

0 comments on commit ce62915

Please sign in to comment.