diff --git a/executor/analyze.go b/executor/analyze.go index c159b82313206..05ae1c5ffc3e9 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/debugpb" + "github.com/pingcap/parser/ast" "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/distsql" @@ -70,10 +71,8 @@ var ( ) const ( - maxRegionSampleSize = 1000 - maxSketchSize = 10000 - defaultCMSketchDepth = 5 - defaultCMSketchWidth = 2048 + maxRegionSampleSize = 1000 + maxSketchSize = 10000 ) // Next implements the Executor Next interface. @@ -252,7 +251,7 @@ type AnalyzeIndexExec struct { analyzePB *tipb.AnalyzeReq result distsql.SelectResult countNullRes distsql.SelectResult - maxNumBuckets uint64 + opts map[ast.AnalyzeOptionType]uint64 job *statistics.AnalyzeJob } @@ -307,7 +306,7 @@ func (e *AnalyzeIndexExec) buildStatsFromResult(result distsql.SelectResult, nee hist := &statistics.Histogram{} var cms *statistics.CMSketch if needCMS { - cms = statistics.NewCMSketch(defaultCMSketchDepth, defaultCMSketchWidth) + cms = statistics.NewCMSketch(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth])) } for { data, err := result.NextRaw(context.TODO()) @@ -324,7 +323,7 @@ func (e *AnalyzeIndexExec) buildStatsFromResult(result distsql.SelectResult, nee } respHist := statistics.HistogramFromProto(resp.Hist) e.job.Update(int64(respHist.TotalRowCount())) - hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, respHist, int(e.maxNumBuckets)) + hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets])) if err != nil { return nil, nil, err } @@ -401,7 +400,7 @@ type AnalyzeColumnsExec struct { priority int analyzePB *tipb.AnalyzeReq resultHandler *tableResultHandler - maxNumBuckets uint64 + opts map[ast.AnalyzeOptionType]uint64 job *statistics.AnalyzeJob } @@ -465,7 +464,7 @@ func (e *AnalyzeColumnsExec) buildStats(ranges []*ranger.Range) (hists []*statis IsMerger: true, FMSketch: statistics.NewFMSketch(maxSketchSize), MaxSampleSize: int64(MaxSampleSize), - CMSketch: statistics.NewCMSketch(defaultCMSketchDepth, defaultCMSketchWidth), + CMSketch: statistics.NewCMSketch(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth])), } } for { @@ -486,7 +485,7 @@ func (e *AnalyzeColumnsExec) buildStats(ranges []*ranger.Range) (hists []*statis if e.pkInfo != nil { respHist := statistics.HistogramFromProto(resp.PkHist) rowCount = int64(respHist.TotalRowCount()) - pkHist, err = statistics.MergeHistograms(sc, pkHist, respHist, int(e.maxNumBuckets)) + pkHist, err = statistics.MergeHistograms(sc, pkHist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets])) if err != nil { return nil, nil, err } @@ -516,7 +515,7 @@ func (e *AnalyzeColumnsExec) buildStats(ranges []*ranger.Range) (hists []*statis return nil, nil, err } } - hg, err := statistics.BuildColumn(e.ctx, int64(e.maxNumBuckets), col.ID, collectors[i], &col.FieldType) + hg, err := statistics.BuildColumn(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), col.ID, collectors[i], &col.FieldType) if err != nil { return nil, nil, err } @@ -591,7 +590,7 @@ type AnalyzeFastExec struct { colsInfo []*model.ColumnInfo idxsInfo []*model.IndexInfo concurrency int - maxNumBuckets uint64 + opts map[ast.AnalyzeOptionType]uint64 tblInfo *model.TableInfo cache *tikv.RegionCache wg *sync.WaitGroup @@ -1006,9 +1005,9 @@ func (e *AnalyzeFastExec) buildColumnStats(ID int64, collector *statistics.Sampl data = append(data, bytes) } // Build CMSketch. - cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(defaultCMSketchDepth, defaultCMSketchWidth, data, 20, uint64(rowCount)) + cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data, uint32(e.opts[ast.AnalyzeOptNumTopN]), uint64(rowCount)) // Build Histogram. - hist, err := statistics.BuildColumnHist(e.ctx, int64(e.maxNumBuckets), ID, collector, tp, rowCount, int64(ndv), collector.NullCount*int64(scaleRatio)) + hist, err := statistics.BuildColumnHist(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), ID, collector, tp, rowCount, int64(ndv), collector.NullCount*int64(scaleRatio)) return hist, cmSketch, err } @@ -1029,20 +1028,20 @@ func (e *AnalyzeFastExec) buildIndexStats(idxInfo *model.IndexInfo, collector *s data[i] = append(data[i], sample.Value.GetBytes()[:preLen]) } } - numTop := uint32(20) - cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(defaultCMSketchDepth, defaultCMSketchWidth, data[0], numTop, uint64(rowCount)) + numTop := uint32(e.opts[ast.AnalyzeOptNumTopN]) + cmSketch, ndv, scaleRatio := statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data[0], numTop, uint64(rowCount)) // Build CM Sketch for each prefix and merge them into one. for i := 1; i < len(idxInfo.Columns); i++ { var curCMSketch *statistics.CMSketch // `ndv` should be the ndv of full index, so just rewrite it here. - curCMSketch, ndv, scaleRatio = statistics.NewCMSketchWithTopN(defaultCMSketchDepth, defaultCMSketchWidth, data[i], numTop, uint64(rowCount)) + curCMSketch, ndv, scaleRatio = statistics.NewCMSketchWithTopN(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]), data[i], numTop, uint64(rowCount)) err := cmSketch.MergeCMSketch(curCMSketch, numTop) if err != nil { return nil, nil, err } } // Build Histogram. - hist, err := statistics.BuildColumnHist(e.ctx, int64(e.maxNumBuckets), idxInfo.ID, collector, types.NewFieldType(mysql.TypeBlob), rowCount, int64(ndv), collector.NullCount*int64(scaleRatio)) + hist, err := statistics.BuildColumnHist(e.ctx, int64(e.opts[ast.AnalyzeOptNumBuckets]), idxInfo.ID, collector, types.NewFieldType(mysql.TypeBlob), rowCount, int64(ndv), collector.NullCount*int64(scaleRatio)) return hist, cmSketch, err } @@ -1209,7 +1208,7 @@ func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult if err != nil { return analyzeResult{Err: err, job: idxExec.job} } - hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, idxExec.oldHist, hist, int(idxExec.maxNumBuckets)) + hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, idxExec.oldHist, hist, int(idxExec.opts[ast.AnalyzeOptNumBuckets])) if err != nil { return analyzeResult{Err: err, job: idxExec.job} } @@ -1252,7 +1251,7 @@ func analyzePKIncremental(colExec *analyzePKIncrementalExec) analyzeResult { return analyzeResult{Err: err, job: colExec.job} } hist := hists[0] - hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, colExec.oldHist, hist, int(colExec.maxNumBuckets)) + hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, colExec.oldHist, hist, int(colExec.opts[ast.AnalyzeOptNumBuckets])) if err != nil { return analyzeResult{Err: err, job: colExec.job} } diff --git a/executor/analyze_test.go b/executor/analyze_test.go index ce0f579e56757..e7bc61e02f89b 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -123,18 +123,31 @@ func (s *testSuite1) TestAnalyzeParameters(c *C) { for i := 0; i < 20; i++ { tk.MustExec(fmt.Sprintf("insert into t values (%d)", i)) } + tk.MustExec("insert into t values (19), (19), (19)") + tk.MustExec("set @@tidb_enable_fast_analyze = 1") + executor.MaxSampleSize = 30 tk.MustExec("analyze table t") is := executor.GetInfoSchema(tk.Se.(sessionctx.Context)) table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) c.Assert(err, IsNil) tableInfo := table.Meta() tbl := s.dom.StatsHandle().GetTableStats(tableInfo) - c.Assert(tbl.Columns[1].Len(), Equals, 20) - - tk.MustExec("analyze table t with 4 buckets") + col := tbl.Columns[1] + c.Assert(col.Len(), Equals, 20) + c.Assert(len(col.CMSketch.TopN()), Equals, 20) + width, depth := col.CMSketch.GetWidthAndDepth() + c.Assert(depth, Equals, int32(5)) + c.Assert(width, Equals, int32(2048)) + + tk.MustExec("analyze table t with 4 buckets, 1 topn, 4 cmsketch width, 4 cmsketch depth") tbl = s.dom.StatsHandle().GetTableStats(tableInfo) - c.Assert(tbl.Columns[1].Len(), Equals, 4) + col = tbl.Columns[1] + c.Assert(col.Len(), Equals, 4) + c.Assert(len(col.CMSketch.TopN()), Equals, 1) + width, depth = col.CMSketch.GetWidthAndDepth() + c.Assert(depth, Equals, int32(4)) + c.Assert(width, Equals, int32(4)) } func (s *testSuite1) TestAnalyzeTooLongColumns(c *C) { diff --git a/executor/builder.go b/executor/builder.go index d2d048616bd93..cb77e13ed179e 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -1408,7 +1408,7 @@ func (b *executorBuilder) buildDelete(v *plannercore.Delete) Executor { return deleteExec } -func (b *executorBuilder) buildAnalyzeIndexPushdown(task plannercore.AnalyzeIndexTask, maxNumBuckets uint64, autoAnalyze string) *analyzeTask { +func (b *executorBuilder) buildAnalyzeIndexPushdown(task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]uint64, autoAnalyze string) *analyzeTask { _, offset := timeutil.Zone(b.ctx.GetSessionVars().Location()) sc := b.ctx.GetSessionVars().StmtCtx e := &AnalyzeIndexExec{ @@ -1422,24 +1422,24 @@ func (b *executorBuilder) buildAnalyzeIndexPushdown(task plannercore.AnalyzeInde Flags: sc.PushDownFlags(), TimeZoneOffset: offset, }, - maxNumBuckets: maxNumBuckets, + opts: opts, } e.analyzePB.IdxReq = &tipb.AnalyzeIndexReq{ - BucketSize: int64(maxNumBuckets), + BucketSize: int64(opts[ast.AnalyzeOptNumBuckets]), NumColumns: int32(len(task.IndexInfo.Columns)), } - depth := int32(defaultCMSketchDepth) - width := int32(defaultCMSketchWidth) + depth := int32(opts[ast.AnalyzeOptCMSketchDepth]) + width := int32(opts[ast.AnalyzeOptCMSketchWidth]) e.analyzePB.IdxReq.CmsketchDepth = &depth e.analyzePB.IdxReq.CmsketchWidth = &width job := &statistics.AnalyzeJob{DBName: task.DBName, TableName: task.TableName, PartitionName: task.PartitionName, JobInfo: autoAnalyze + "analyze index " + task.IndexInfo.Name.O} return &analyzeTask{taskType: idxTask, idxExec: e, job: job} } -func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeIndexTask, maxNumBuckets uint64) *analyzeTask { +func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]uint64) *analyzeTask { h := domain.GetDomain(b.ctx).StatsHandle() statsTbl := h.GetPartitionStats(&model.TableInfo{}, task.PhysicalTableID) - analyzeTask := b.buildAnalyzeIndexPushdown(task, maxNumBuckets, "") + analyzeTask := b.buildAnalyzeIndexPushdown(task, opts, "") if statsTbl.Pseudo { return analyzeTask } @@ -1470,7 +1470,7 @@ func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeI return analyzeTask } -func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeColumnsTask, maxNumBuckets uint64, autoAnalyze string) *analyzeTask { +func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]uint64, autoAnalyze string) *analyzeTask { cols := task.ColsInfo if task.PKInfo != nil { cols = append([]*model.ColumnInfo{task.PKInfo}, cols...) @@ -1490,12 +1490,12 @@ func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeCo Flags: sc.PushDownFlags(), TimeZoneOffset: offset, }, - maxNumBuckets: maxNumBuckets, + opts: opts, } - depth := int32(defaultCMSketchDepth) - width := int32(defaultCMSketchWidth) + depth := int32(opts[ast.AnalyzeOptCMSketchDepth]) + width := int32(opts[ast.AnalyzeOptCMSketchWidth]) e.analyzePB.ColReq = &tipb.AnalyzeColumnsReq{ - BucketSize: int64(maxNumBuckets), + BucketSize: int64(opts[ast.AnalyzeOptNumBuckets]), SampleSize: maxRegionSampleSize, SketchSize: maxSketchSize, ColumnsInfo: model.ColumnsToProto(cols, task.PKInfo != nil), @@ -1507,10 +1507,10 @@ func (b *executorBuilder) buildAnalyzeColumnsPushdown(task plannercore.AnalyzeCo return &analyzeTask{taskType: colTask, colExec: e, job: job} } -func (b *executorBuilder) buildAnalyzePKIncremental(task plannercore.AnalyzeColumnsTask, maxNumBuckets uint64) *analyzeTask { +func (b *executorBuilder) buildAnalyzePKIncremental(task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]uint64) *analyzeTask { h := domain.GetDomain(b.ctx).StatsHandle() statsTbl := h.GetPartitionStats(&model.TableInfo{}, task.PhysicalTableID) - analyzeTask := b.buildAnalyzeColumnsPushdown(task, maxNumBuckets, "") + analyzeTask := b.buildAnalyzeColumnsPushdown(task, opts, "") if statsTbl.Pseudo { return analyzeTask } @@ -1541,7 +1541,7 @@ func (b *executorBuilder) buildAnalyzePKIncremental(task plannercore.AnalyzeColu return analyzeTask } -func (b *executorBuilder) buildAnalyzeFastColumn(e *AnalyzeExec, task plannercore.AnalyzeColumnsTask, maxNumBuckets uint64) { +func (b *executorBuilder) buildAnalyzeFastColumn(e *AnalyzeExec, task plannercore.AnalyzeColumnsTask, opts map[ast.AnalyzeOptionType]uint64) { findTask := false for _, eTask := range e.tasks { if eTask.fastExec.physicalTableID == task.PhysicalTableID { @@ -1563,7 +1563,7 @@ func (b *executorBuilder) buildAnalyzeFastColumn(e *AnalyzeExec, task plannercor physicalTableID: task.PhysicalTableID, colsInfo: task.ColsInfo, pkInfo: task.PKInfo, - maxNumBuckets: maxNumBuckets, + opts: opts, tblInfo: task.TblInfo, concurrency: concurrency, wg: &sync.WaitGroup{}, @@ -1573,7 +1573,7 @@ func (b *executorBuilder) buildAnalyzeFastColumn(e *AnalyzeExec, task plannercor } } -func (b *executorBuilder) buildAnalyzeFastIndex(e *AnalyzeExec, task plannercore.AnalyzeIndexTask, maxNumBuckets uint64) { +func (b *executorBuilder) buildAnalyzeFastIndex(e *AnalyzeExec, task plannercore.AnalyzeIndexTask, opts map[ast.AnalyzeOptionType]uint64) { findTask := false for _, eTask := range e.tasks { if eTask.fastExec.physicalTableID == task.PhysicalTableID { @@ -1594,7 +1594,7 @@ func (b *executorBuilder) buildAnalyzeFastIndex(e *AnalyzeExec, task plannercore ctx: b.ctx, physicalTableID: task.PhysicalTableID, idxsInfo: []*model.IndexInfo{task.IndexInfo}, - maxNumBuckets: maxNumBuckets, + opts: opts, tblInfo: task.TblInfo, concurrency: concurrency, wg: &sync.WaitGroup{}, @@ -1617,12 +1617,12 @@ func (b *executorBuilder) buildAnalyze(v *plannercore.Analyze) Executor { } for _, task := range v.ColTasks { if task.Incremental { - e.tasks = append(e.tasks, b.buildAnalyzePKIncremental(task, v.MaxNumBuckets)) + e.tasks = append(e.tasks, b.buildAnalyzePKIncremental(task, v.Opts)) } else { if enableFastAnalyze { - b.buildAnalyzeFastColumn(e, task, v.MaxNumBuckets) + b.buildAnalyzeFastColumn(e, task, v.Opts) } else { - e.tasks = append(e.tasks, b.buildAnalyzeColumnsPushdown(task, v.MaxNumBuckets, autoAnalyze)) + e.tasks = append(e.tasks, b.buildAnalyzeColumnsPushdown(task, v.Opts, autoAnalyze)) } } if b.err != nil { @@ -1631,12 +1631,12 @@ func (b *executorBuilder) buildAnalyze(v *plannercore.Analyze) Executor { } for _, task := range v.IdxTasks { if task.Incremental { - e.tasks = append(e.tasks, b.buildAnalyzeIndexIncremental(task, v.MaxNumBuckets)) + e.tasks = append(e.tasks, b.buildAnalyzeIndexIncremental(task, v.Opts)) } else { if enableFastAnalyze { - b.buildAnalyzeFastIndex(e, task, v.MaxNumBuckets) + b.buildAnalyzeFastIndex(e, task, v.Opts) } else { - e.tasks = append(e.tasks, b.buildAnalyzeIndexPushdown(task, v.MaxNumBuckets, autoAnalyze)) + e.tasks = append(e.tasks, b.buildAnalyzeIndexPushdown(task, v.Opts, autoAnalyze)) } } if b.err != nil { diff --git a/executor/executor_test.go b/executor/executor_test.go index 762f31a7faee1..139bb7f8b2629 100644 --- a/executor/executor_test.go +++ b/executor/executor_test.go @@ -2714,6 +2714,7 @@ func (s *testSuite1) SetUpSuite(c *C) { mockstore.WithHijackClient(hijackClient), ) c.Assert(err, IsNil) + session.SetStatsLease(0) s.dom, err = session.BootstrapSession(s.store) c.Assert(err, IsNil) s.dom.SetStatsUpdating(true) diff --git a/go.mod b/go.mod index ba19c42dd4d89..70745f85587d5 100644 --- a/go.mod +++ b/go.mod @@ -41,7 +41,7 @@ require ( github.com/pingcap/goleveldb v0.0.0-20171020122428-b9ff6c35079e github.com/pingcap/kvproto v0.0.0-20190703131923-d9830856b531 github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 - github.com/pingcap/parser v0.0.0-20190719041739-ff945b25f903 + github.com/pingcap/parser v0.0.0-20190723083556-57e1f3b7a1c1 github.com/pingcap/pd v0.0.0-20190712044914-75a1f9f3062b github.com/pingcap/tidb-tools v2.1.3-0.20190321065848-1e8b48f5c168+incompatible github.com/pingcap/tipb v0.0.0-20190428032612-535e1abaa330 diff --git a/go.sum b/go.sum index 6d6d387061618..ca2f017ad302e 100644 --- a/go.sum +++ b/go.sum @@ -165,8 +165,8 @@ github.com/pingcap/kvproto v0.0.0-20190703131923-d9830856b531/go.mod h1:QMdbTAXC github.com/pingcap/log v0.0.0-20190214045112-b37da76f67a7/go.mod h1:xsfkWVaFVV5B8e1K9seWfyJWFrIhbtUTAD8NV1Pq3+w= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 h1:t2OQTpPJnrPDGlvA+3FwJptMTt6MEPdzK1Wt99oaefQ= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596/go.mod h1:WpHUKhNZ18v116SvGrmjkA9CBhYmuUTKL+p8JC9ANEw= -github.com/pingcap/parser v0.0.0-20190719041739-ff945b25f903 h1:mRZH1M//ZhlpJ9ByB6TyEFErVO5vsfeWyA8a0SklkF0= -github.com/pingcap/parser v0.0.0-20190719041739-ff945b25f903/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= +github.com/pingcap/parser v0.0.0-20190723083556-57e1f3b7a1c1 h1:/L2n0wamoKiRlXOn7xCNk8ejgXJbjmC3X54pGYSgPvQ= +github.com/pingcap/parser v0.0.0-20190723083556-57e1f3b7a1c1/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= github.com/pingcap/pd v0.0.0-20190712044914-75a1f9f3062b h1:oS9PftxQqgcRouKhhdaB52tXhVLEP7Ng3Qqsd6Z18iY= github.com/pingcap/pd v0.0.0-20190712044914-75a1f9f3062b/go.mod h1:3DlDlFT7EF64A1bmb/tulZb6wbPSagm5G4p1AlhaEDs= github.com/pingcap/tidb-tools v2.1.3-0.20190321065848-1e8b48f5c168+incompatible h1:MkWCxgZpJBgY2f4HtwWMMFzSBb3+JPzeJgF3VrXE/bU= diff --git a/planner/core/common_plans.go b/planner/core/common_plans.go index 8cd2290e6682b..0cf8a357a5716 100644 --- a/planner/core/common_plans.go +++ b/planner/core/common_plans.go @@ -495,9 +495,9 @@ type AnalyzeIndexTask struct { type Analyze struct { baseSchemaProducer - ColTasks []AnalyzeColumnsTask - IdxTasks []AnalyzeIndexTask - MaxNumBuckets uint64 + ColTasks []AnalyzeColumnsTask + IdxTasks []AnalyzeIndexTask + Opts map[ast.AnalyzeOptionType]uint64 } // LoadData represents a loaddata plan. diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index b4ae31c8bae6d..48b9a76669577 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -16,10 +16,10 @@ package core import ( "bytes" "context" + "encoding/binary" "fmt" "strings" - "github.com/cznic/mathutil" "github.com/pingcap/errors" "github.com/pingcap/parser" "github.com/pingcap/parser/ast" @@ -30,6 +30,7 @@ import ( "github.com/pingcap/tidb/ddl" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/infoschema" + "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/planner/property" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/stmtctx" @@ -821,8 +822,8 @@ func getPhysicalIDsAndPartitionNames(tblInfo *model.TableInfo, partitionNames [] return ids, names, nil } -func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) (Plan, error) { - p := &Analyze{MaxNumBuckets: as.MaxNumBuckets} +func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.AnalyzeOptionType]uint64) (Plan, error) { + p := &Analyze{Opts: opts} for _, tbl := range as.TableNames { if tbl.TableInfo.IsView() { return nil, errors.Errorf("analyze %s is not supported now.", tbl.Name.O) @@ -857,8 +858,8 @@ func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt) (Plan, error) return p, nil } -func (b *PlanBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error) { - p := &Analyze{MaxNumBuckets: as.MaxNumBuckets} +func (b *PlanBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt, opts map[ast.AnalyzeOptionType]uint64) (Plan, error) { + p := &Analyze{Opts: opts} tblInfo := as.TableNames[0].TableInfo physicalIDs, names, err := getPhysicalIDsAndPartitionNames(tblInfo, as.PartitionNames) if err != nil { @@ -885,8 +886,8 @@ func (b *PlanBuilder) buildAnalyzeIndex(as *ast.AnalyzeTableStmt) (Plan, error) return p, nil } -func (b *PlanBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) (Plan, error) { - p := &Analyze{MaxNumBuckets: as.MaxNumBuckets} +func (b *PlanBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt, opts map[ast.AnalyzeOptionType]uint64) (Plan, error) { + p := &Analyze{Opts: opts} tblInfo := as.TableNames[0].TableInfo physicalIDs, names, err := getPhysicalIDsAndPartitionNames(tblInfo, as.PartitionNames) if err != nil { @@ -910,10 +911,44 @@ func (b *PlanBuilder) buildAnalyzeAllIndex(as *ast.AnalyzeTableStmt) (Plan, erro return p, nil } -const ( - defaultMaxNumBuckets = 256 - numBucketsLimit = 1024 -) +var cmSketchSizeLimit = kv.TxnEntrySizeLimit / binary.MaxVarintLen32 + +var analyzeOptionLimit = map[ast.AnalyzeOptionType]uint64{ + ast.AnalyzeOptNumBuckets: 1024, + ast.AnalyzeOptNumTopN: 1024, + ast.AnalyzeOptCMSketchWidth: uint64(cmSketchSizeLimit), + ast.AnalyzeOptCMSketchDepth: uint64(cmSketchSizeLimit), +} + +var analyzeOptionDefault = map[ast.AnalyzeOptionType]uint64{ + ast.AnalyzeOptNumBuckets: 256, + ast.AnalyzeOptNumTopN: 20, + ast.AnalyzeOptCMSketchWidth: 2048, + ast.AnalyzeOptCMSketchDepth: 5, +} + +func handleAnalyzeOptions(opts []ast.AnalyzeOpt) (map[ast.AnalyzeOptionType]uint64, error) { + optMap := make(map[ast.AnalyzeOptionType]uint64, len(analyzeOptionDefault)) + for key, val := range analyzeOptionDefault { + optMap[key] = val + } + for _, opt := range opts { + if opt.Type == ast.AnalyzeOptNumTopN { + if opt.Value > analyzeOptionLimit[opt.Type] { + return nil, errors.Errorf("value of analyze option %s should not larger than %d", ast.AnalyzeOptionString[opt.Type], analyzeOptionLimit[opt.Type]) + } + } else { + if opt.Value == 0 || opt.Value > analyzeOptionLimit[opt.Type] { + return nil, errors.Errorf("value of analyze option %s should be positive and not larger than %d", ast.AnalyzeOptionString[opt.Type], analyzeOptionLimit[opt.Type]) + } + } + optMap[opt.Type] = opt.Value + } + if optMap[ast.AnalyzeOptCMSketchWidth]*optMap[ast.AnalyzeOptCMSketchDepth] > uint64(cmSketchSizeLimit) { + return nil, errors.Errorf("cm sketch size(depth * width) should not larger than %d", cmSketchSizeLimit) + } + return optMap, nil +} func (b *PlanBuilder) buildAnalyze(as *ast.AnalyzeTableStmt) (Plan, error) { // If enable fast analyze, the storage must be tikv.Storage. @@ -930,18 +965,17 @@ func (b *PlanBuilder) buildAnalyze(as *ast.AnalyzeTableStmt) (Plan, error) { b.visitInfo = appendVisitInfo(b.visitInfo, mysql.InsertPriv, tbl.Schema.O, tbl.Name.O, "", insertErr) b.visitInfo = appendVisitInfo(b.visitInfo, mysql.SelectPriv, tbl.Schema.O, tbl.Name.O, "", selectErr) } - if as.MaxNumBuckets == 0 { - as.MaxNumBuckets = defaultMaxNumBuckets - } else { - as.MaxNumBuckets = mathutil.MinUint64(as.MaxNumBuckets, numBucketsLimit) + opts, err := handleAnalyzeOptions(as.AnalyzeOpts) + if err != nil { + return nil, err } if as.IndexFlag { if len(as.IndexNames) == 0 { - return b.buildAnalyzeAllIndex(as) + return b.buildAnalyzeAllIndex(as, opts) } - return b.buildAnalyzeIndex(as) + return b.buildAnalyzeIndex(as, opts) } - return b.buildAnalyzeTable(as) + return b.buildAnalyzeTable(as, opts) } func buildShowNextRowID() *expression.Schema {