Skip to content

Commit e7afbb2

Browse files
authored
*: make analyze buckets number configurable (#7619)
1 parent 7c6c279 commit e7afbb2

11 files changed

+78
-45
lines changed

ast/stats.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ var (
2525
type AnalyzeTableStmt struct {
2626
stmtNode
2727

28-
TableNames []*TableName
29-
IndexNames []model.CIStr
28+
TableNames []*TableName
29+
IndexNames []model.CIStr
30+
MaxNumBuckets uint64
3031

3132
// IndexFlag is true when we only analyze indices for a table.
3233
IndexFlag bool

executor/analyze.go

+5-15
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ type AnalyzeExec struct {
4242
tasks []*analyzeTask
4343
}
4444

45-
var maxBucketSize = int64(256)
46-
4745
const (
4846
maxSampleSize = 10000
4947
maxRegionSampleSize = 1000
@@ -167,6 +165,7 @@ type AnalyzeIndexExec struct {
167165
priority int
168166
analyzePB *tipb.AnalyzeReq
169167
result distsql.SelectResult
168+
maxNumBuckets uint64
170169
}
171170

172171
func (e *AnalyzeIndexExec) open() error {
@@ -211,7 +210,7 @@ func (e *AnalyzeIndexExec) buildStats() (hist *statistics.Histogram, cms *statis
211210
if err != nil {
212211
return nil, nil, errors.Trace(err)
213212
}
214-
hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, statistics.HistogramFromProto(resp.Hist), int(maxBucketSize))
213+
hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, statistics.HistogramFromProto(resp.Hist), int(e.maxNumBuckets))
215214
if err != nil {
216215
return nil, nil, errors.Trace(err)
217216
}
@@ -255,6 +254,7 @@ type AnalyzeColumnsExec struct {
255254
keepOrder bool
256255
analyzePB *tipb.AnalyzeReq
257256
resultHandler *tableResultHandler
257+
maxNumBuckets uint64
258258
}
259259

260260
func (e *AnalyzeColumnsExec) open() error {
@@ -339,7 +339,7 @@ func (e *AnalyzeColumnsExec) buildStats() (hists []*statistics.Histogram, cms []
339339
}
340340
sc := e.ctx.GetSessionVars().StmtCtx
341341
if e.pkInfo != nil {
342-
pkHist, err = statistics.MergeHistograms(sc, pkHist, statistics.HistogramFromProto(resp.PkHist), int(maxBucketSize))
342+
pkHist, err = statistics.MergeHistograms(sc, pkHist, statistics.HistogramFromProto(resp.PkHist), int(e.maxNumBuckets))
343343
if err != nil {
344344
return nil, nil, errors.Trace(err)
345345
}
@@ -365,7 +365,7 @@ func (e *AnalyzeColumnsExec) buildStats() (hists []*statistics.Histogram, cms []
365365
return nil, nil, errors.Trace(err)
366366
}
367367
}
368-
hg, err := statistics.BuildColumn(e.ctx, maxBucketSize, col.ID, collectors[i], &col.FieldType)
368+
hg, err := statistics.BuildColumn(e.ctx, int64(e.maxNumBuckets), col.ID, collectors[i], &col.FieldType)
369369
if err != nil {
370370
return nil, nil, errors.Trace(err)
371371
}
@@ -374,13 +374,3 @@ func (e *AnalyzeColumnsExec) buildStats() (hists []*statistics.Histogram, cms []
374374
}
375375
return hists, cms, nil
376376
}
377-
378-
// SetMaxBucketSizeForTest sets the `maxBucketSize`.
379-
func SetMaxBucketSizeForTest(size int64) {
380-
maxBucketSize = size
381-
}
382-
383-
// GetMaxBucketSizeForTest gets the `maxBucketSize`.
384-
func GetMaxBucketSizeForTest() int64 {
385-
return maxBucketSize
386-
}

executor/analyze_test.go

+22
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,25 @@ PARTITION BY RANGE ( a ) (
6262
}
6363
}
6464
}
65+
66+
func (s *testSuite) TestAnalyzeParameters(c *C) {
67+
tk := testkit.NewTestKit(c, s.store)
68+
tk.MustExec("use test")
69+
tk.MustExec("drop table if exists t")
70+
tk.MustExec("create table t(a int)")
71+
for i := 0; i < 20; i++ {
72+
tk.MustExec(fmt.Sprintf("insert into t values (%d)", i))
73+
}
74+
75+
tk.MustExec("analyze table t")
76+
is := executor.GetInfoSchema(tk.Se.(sessionctx.Context))
77+
table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
78+
c.Assert(err, IsNil)
79+
tableInfo := table.Meta()
80+
tbl := s.domain.StatsHandle().GetTableStats(tableInfo)
81+
c.Assert(tbl.Columns[1].Len(), Equals, 20)
82+
83+
tk.MustExec("analyze table t with 4 buckets")
84+
tbl = s.domain.StatsHandle().GetTableStats(tableInfo)
85+
c.Assert(tbl.Columns[1].Len(), Equals, 4)
86+
}

executor/builder.go

+8-6
Original file line numberDiff line numberDiff line change
@@ -1318,7 +1318,7 @@ func (b *executorBuilder) buildDelete(v *plan.Delete) Executor {
13181318
return deleteExec
13191319
}
13201320

1321-
func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask) *AnalyzeIndexExec {
1321+
func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask, maxNumBuckets uint64) *AnalyzeIndexExec {
13221322
_, offset := zone(b.ctx)
13231323
e := &AnalyzeIndexExec{
13241324
ctx: b.ctx,
@@ -1331,9 +1331,10 @@ func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask)
13311331
Flags: statementContextToFlags(b.ctx.GetSessionVars().StmtCtx),
13321332
TimeZoneOffset: offset,
13331333
},
1334+
maxNumBuckets: maxNumBuckets,
13341335
}
13351336
e.analyzePB.IdxReq = &tipb.AnalyzeIndexReq{
1336-
BucketSize: maxBucketSize,
1337+
BucketSize: int64(maxNumBuckets),
13371338
NumColumns: int32(len(task.IndexInfo.Columns)),
13381339
}
13391340
depth := int32(defaultCMSketchDepth)
@@ -1343,7 +1344,7 @@ func (b *executorBuilder) buildAnalyzeIndexPushdown(task plan.AnalyzeIndexTask)
13431344
return e
13441345
}
13451346

1346-
func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plan.AnalyzeColumnsTask) *AnalyzeColumnsExec {
1347+
func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plan.AnalyzeColumnsTask, maxNumBuckets uint64) *AnalyzeColumnsExec {
13471348
cols := task.ColsInfo
13481349
keepOrder := false
13491350
if task.PKInfo != nil {
@@ -1365,11 +1366,12 @@ func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plan.AnalyzeColumnsTa
13651366
Flags: statementContextToFlags(b.ctx.GetSessionVars().StmtCtx),
13661367
TimeZoneOffset: offset,
13671368
},
1369+
maxNumBuckets: maxNumBuckets,
13681370
}
13691371
depth := int32(defaultCMSketchDepth)
13701372
width := int32(defaultCMSketchWidth)
13711373
e.analyzePB.ColReq = &tipb.AnalyzeColumnsReq{
1372-
BucketSize: maxBucketSize,
1374+
BucketSize: int64(maxNumBuckets),
13731375
SampleSize: maxRegionSampleSize,
13741376
SketchSize: maxSketchSize,
13751377
ColumnsInfo: model.ColumnsToProto(cols, task.PKInfo != nil),
@@ -1388,7 +1390,7 @@ func (b *executorBuilder) buildAnalyze(v *plan.Analyze) Executor {
13881390
for _, task := range v.ColTasks {
13891391
e.tasks = append(e.tasks, &analyzeTask{
13901392
taskType: colTask,
1391-
colExec: b.buildAnalyzeColumnsPushdown(task),
1393+
colExec: b.buildAnalyzeColumnsPushdown(task, v.MaxNumBuckets),
13921394
})
13931395
if b.err != nil {
13941396
b.err = errors.Trace(b.err)
@@ -1398,7 +1400,7 @@ func (b *executorBuilder) buildAnalyze(v *plan.Analyze) Executor {
13981400
for _, task := range v.IdxTasks {
13991401
e.tasks = append(e.tasks, &analyzeTask{
14001402
taskType: idxTask,
1401-
idxExec: b.buildAnalyzeIndexPushdown(task),
1403+
idxExec: b.buildAnalyzeIndexPushdown(task, v.MaxNumBuckets),
14021404
})
14031405
if b.err != nil {
14041406
b.err = errors.Trace(b.err)

parser/misc.go

+1
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ var tokenMap = map[string]int{
165165
"BOOLEAN": booleanType,
166166
"BOTH": both,
167167
"BTREE": btree,
168+
"BUCKETS": buckets,
168169
"BY": by,
169170
"BYTE": byteType,
170171
"CANCEL": cancel,

parser/parser.y

+18-7
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ import (
434434

435435
/* The following tokens belong to TiDBKeyword. */
436436
admin "ADMIN"
437+
buckets "BUCKETS"
437438
cancel "CANCEL"
438439
ddl "DDL"
439440
jobs "JOBS"
@@ -666,6 +667,7 @@ import (
666667
LinesTerminated "Lines terminated by"
667668
LocalOpt "Local opt"
668669
LockClause "Alter table lock clause"
670+
MaxNumBuckets "Max number of buckets"
669671
NumLiteral "Num/Int/Float/Decimal Literal"
670672
NoWriteToBinLogAliasOpt "NO_WRITE_TO_BINLOG alias LOCAL or empty"
671673
ObjectType "Grant statement object type"
@@ -1225,14 +1227,23 @@ TableToTable:
12251227
/*******************************************************************************************/
12261228

12271229
AnalyzeTableStmt:
1228-
"ANALYZE" "TABLE" TableNameList
1230+
"ANALYZE" "TABLE" TableNameList MaxNumBuckets
12291231
{
1230-
$$ = &ast.AnalyzeTableStmt{TableNames: $3.([]*ast.TableName)}
1232+
$$ = &ast.AnalyzeTableStmt{TableNames: $3.([]*ast.TableName), MaxNumBuckets: $4.(uint64)}
12311233
}
1232-
| "ANALYZE" "TABLE" TableName "INDEX" IndexNameList
1233-
{
1234-
$$ = &ast.AnalyzeTableStmt{TableNames: []*ast.TableName{$3.(*ast.TableName)}, IndexNames: $5.([]model.CIStr), IndexFlag: true}
1235-
}
1234+
| "ANALYZE" "TABLE" TableName "INDEX" IndexNameList MaxNumBuckets
1235+
{
1236+
$$ = &ast.AnalyzeTableStmt{TableNames: []*ast.TableName{$3.(*ast.TableName)}, IndexNames: $5.([]model.CIStr), IndexFlag: true, MaxNumBuckets: $6.(uint64)}
1237+
}
1238+
1239+
MaxNumBuckets:
1240+
{
1241+
$$ = uint64(0)
1242+
}
1243+
| "WITH" NUM "BUCKETS"
1244+
{
1245+
$$ = getUint64FromNUM($2)
1246+
}
12361247

12371248
/*******************************************************************************************/
12381249
Assignment:
@@ -2809,7 +2820,7 @@ UnReservedKeyword:
28092820

28102821

28112822
TiDBKeyword:
2812-
"ADMIN" | "CANCEL" | "DDL" | "JOBS" | "JOB" | "STATS" | "STATS_META" | "STATS_HISTOGRAMS" | "STATS_BUCKETS" | "STATS_HEALTHY" | "TIDB" | "TIDB_HJ" | "TIDB_SMJ" | "TIDB_INLJ"
2823+
"ADMIN" | "BUCKETS" | "CANCEL" | "DDL" | "JOBS" | "JOB" | "STATS" | "STATS_META" | "STATS_HISTOGRAMS" | "STATS_BUCKETS" | "STATS_HEALTHY" | "TIDB" | "TIDB_HJ" | "TIDB_SMJ" | "TIDB_INLJ"
28132824

28142825
NotKeywordToken:
28152826
"ADDDATE" | "BIT_AND" | "BIT_OR" | "BIT_XOR" | "CAST" | "COPY" | "COUNT" | "CURTIME" | "DATE_ADD" | "DATE_SUB" | "EXTRACT" | "GET_FORMAT" | "GROUP_CONCAT"

parser/parser_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -2291,6 +2291,8 @@ func (s *testParserSuite) TestAnalyze(c *C) {
22912291
{"analyze table t1 index", true},
22922292
{"analyze table t1 index a", true},
22932293
{"analyze table t1 index a,b", true},
2294+
{"analyze table t with 4 buckets", true},
2295+
{"analyze table t index a with 4 buckets", true},
22942296
}
22952297
s.RunTest(c, table)
22962298
}

plan/cbo_test.go

+1-5
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
. "github.com/pingcap/check"
2222
"github.com/pingcap/tidb/config"
2323
"github.com/pingcap/tidb/domain"
24-
"github.com/pingcap/tidb/executor"
2524
"github.com/pingcap/tidb/kv"
2625
"github.com/pingcap/tidb/plan"
2726
"github.com/pingcap/tidb/session"
@@ -671,10 +670,7 @@ func (s *testAnalyzeSuite) TestInconsistentEstimation(c *C) {
671670
for i := 0; i < 10; i++ {
672671
tk.MustExec("insert into t values (5,5,5), (10,10,10)")
673672
}
674-
origin := executor.GetMaxBucketSizeForTest()
675-
defer func() { executor.SetMaxBucketSizeForTest(origin) }()
676-
executor.SetMaxBucketSizeForTest(2)
677-
tk.MustExec("analyze table t")
673+
tk.MustExec("analyze table t with 2 buckets")
678674
// Force using the histogram to estimate.
679675
tk.MustExec("update mysql.stats_histograms set stats_ver = 0")
680676
dom.StatsHandle().Clear()

plan/common_plans.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,9 @@ type AnalyzeIndexTask struct {
373373
type Analyze struct {
374374
baseSchemaProducer
375375

376-
ColTasks []AnalyzeColumnsTask
377-
IdxTasks []AnalyzeIndexTask
376+
ColTasks []AnalyzeColumnsTask
377+
IdxTasks []AnalyzeIndexTask
378+
MaxNumBuckets uint64
378379
}
379380

380381
// LoadData represents a loaddata plan.

plan/planbuilder.go

+14-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"fmt"
1818
"strings"
1919

20+
"github.com/cznic/mathutil"
2021
"github.com/juju/errors"
2122
"github.com/pingcap/tidb/ast"
2223
"github.com/pingcap/tidb/expression"
@@ -632,7 +633,7 @@ func getPhysicalIDs(tblInfo *model.TableInfo) []int64 {
632633
}
633634

634635
func (b *planBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) Plan {
635-
p := &Analyze{}
636+
p := &Analyze{MaxNumBuckets: as.MaxNumBuckets}
636637
for _, tbl := range as.TableNames {
637638
idxInfo, colInfo, pkInfo := getColsInfo(tbl)
638639
physicalIDs := getPhysicalIDs(tbl.TableInfo)
@@ -651,7 +652,7 @@ func (b *planBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) Plan {
651652
}
652653

653654
func (b *planBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error) {
654-
p := &Analyze{}
655+
p := &Analyze{MaxNumBuckets: as.MaxNumBuckets}
655656
tblInfo := as.TableNames[0].TableInfo
656657
physicalIDs := getPhysicalIDs(tblInfo)
657658
for _, idxName := range as.IndexNames {
@@ -667,7 +668,7 @@ func (b *planBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error)
667668
}
668669

669670
func (b *planBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) Plan {
670-
p := &Analyze{}
671+
p := &Analyze{MaxNumBuckets: as.MaxNumBuckets}
671672
tblInfo := as.TableNames[0].TableInfo
672673
physicalIDs := getPhysicalIDs(tblInfo)
673674
for _, idx := range tblInfo.Indices {
@@ -680,7 +681,17 @@ func (b *planBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) Plan {
680681
return p
681682
}
682683

684+
const (
685+
defaultMaxNumBuckets = 256
686+
numBucketsLimit = 1024
687+
)
688+
683689
func (b *planBuilder) buildAnalyze(as *ast.AnalyzeTableStmt) (Plan, error) {
690+
if as.MaxNumBuckets == 0 {
691+
as.MaxNumBuckets = defaultMaxNumBuckets
692+
} else {
693+
as.MaxNumBuckets = mathutil.MinUint64(as.MaxNumBuckets, numBucketsLimit)
694+
}
684695
if as.IndexFlag {
685696
if len(as.IndexNames) == 0 {
686697
return b.buildAnalyzeAllIndex(as), nil

statistics/update_test.go

+1-5
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020

2121
. "github.com/pingcap/check"
2222
"github.com/pingcap/tidb/domain"
23-
"github.com/pingcap/tidb/executor"
2423
"github.com/pingcap/tidb/kv"
2524
"github.com/pingcap/tidb/model"
2625
"github.com/pingcap/tidb/mysql"
@@ -870,17 +869,14 @@ func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) {
870869
oriMinLogCount := statistics.MinLogScanCount
871870
oriMinError := statistics.MinLogErrorRate
872871
oriLevel := log.GetLevel()
873-
oriBucketNum := executor.GetMaxBucketSizeForTest()
874872
oriLease := s.do.StatsHandle().Lease
875873
defer func() {
876874
statistics.FeedbackProbability = oriProbability
877875
statistics.MinLogScanCount = oriMinLogCount
878876
statistics.MinLogErrorRate = oriMinError
879-
executor.SetMaxBucketSizeForTest(oriBucketNum)
880877
s.do.StatsHandle().Lease = oriLease
881878
log.SetLevel(oriLevel)
882879
}()
883-
executor.SetMaxBucketSizeForTest(4)
884880
statistics.FeedbackProbability = 1
885881
statistics.MinLogScanCount = 0
886882
statistics.MinLogErrorRate = 0
@@ -892,7 +888,7 @@ func (s *testStatsUpdateSuite) TestLogDetailedInfo(c *C) {
892888
for i := 0; i < 20; i++ {
893889
testKit.MustExec(fmt.Sprintf("insert into t values (%d, %d, %d)", i, i, i))
894890
}
895-
testKit.MustExec("analyze table t")
891+
testKit.MustExec("analyze table t with 4 buckets")
896892
tests := []struct {
897893
sql string
898894
result string

0 commit comments

Comments
 (0)