diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel new file mode 100644 index 0000000000000..6997f6ceb6d1e --- /dev/null +++ b/pkg/statistics/BUILD.bazel @@ -0,0 +1,108 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "statistics", + srcs = [ + "analyze.go", + "analyze_jobs.go", + "builder.go", + "builder_ext_stats.go", + "cmsketch.go", + "cmsketch_util.go", + "column.go", + "debugtrace.go", + "estimate.go", + "fmsketch.go", + "histogram.go", + "index.go", + "row_sampler.go", + "sample.go", + "scalar.go", + "table.go", + ], + importpath = "github.com/pingcap/tidb/pkg/statistics", + visibility = ["//visibility:public"], + deps = [ + "//pkg/expression", + "//pkg/kv", + "//pkg/parser/ast", + "//pkg/parser/charset", + "//pkg/parser/model", + "//pkg/parser/mysql", + "//pkg/parser/terror", + "//pkg/planner/util/debugtrace", + "//pkg/sessionctx", + "//pkg/sessionctx/stmtctx", + "//pkg/sessionctx/variable", + "//pkg/statistics/handle/logutil", + "//pkg/tablecodec", + "//pkg/types", + "//pkg/util/chunk", + "//pkg/util/codec", + "//pkg/util/collate", + "//pkg/util/dbterror", + "//pkg/util/fastrand", + "//pkg/util/hack", + "//pkg/util/intest", + "//pkg/util/logutil", + "//pkg/util/memory", + "//pkg/util/ranger", + "//pkg/util/sqlexec", + "@com_github_dolthub_swiss//:swiss", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", + "@com_github_pingcap_tipb//go-tipb", + "@com_github_twmb_murmur3//:murmur3", + "@org_golang_x_exp//maps", + "@org_uber_go_atomic//:atomic", + "@org_uber_go_zap//:zap", + ], +) + +go_test( + name = "statistics_test", + timeout = "short", + srcs = [ + "bench_daily_test.go", + "builder_test.go", + "cmsketch_test.go", + "fmsketch_test.go", + "histogram_bench_test.go", + "histogram_test.go", + "integration_test.go", + "main_test.go", + "sample_test.go", + "scalar_test.go", + "statistics_test.go", + ], + data = glob(["testdata/**"]), + embed = [":statistics"], + flaky = True, + shard_count = 35, + deps = [ + "//pkg/config", + "//pkg/parser/ast", + "//pkg/parser/model", + "//pkg/parser/mysql", + "//pkg/sessionctx", + "//pkg/sessionctx/stmtctx", + "//pkg/statistics/handle/autoanalyze", + "//pkg/testkit", + "//pkg/testkit/testdata", + "//pkg/testkit/testmain", + "//pkg/testkit/testsetup", + "//pkg/types", + "//pkg/util/benchdaily", + "//pkg/util/chunk", + "//pkg/util/codec", + "//pkg/util/collate", + "//pkg/util/memory", + "//pkg/util/mock", + "//pkg/util/ranger", + "//pkg/util/sqlexec", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", + "@com_github_stretchr_testify//require", + "@org_uber_go_goleak//:goleak", + ], +) diff --git a/statistics/builder.go b/statistics/builder.go index ec116803e952d..ffd5e798a37d5 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -374,6 +374,7 @@ func BuildHistAndTopN( if err != nil { return nil, nil, errors.Trace(err) } +<<<<<<< HEAD:statistics/builder.go // For debugging invalid sample data. var ( foundTwice bool @@ -417,12 +418,15 @@ func BuildHistAndTopN( continue } } +======= +>>>>>>> 1fb5a9ae14a (planner: a better way to round scale factor when collecting TopN stats (#49808)):pkg/statistics/builder.go } for i := 0; i < len(topNList); i++ { topNList[i].Count *= uint64(sampleFactor) } topn := &TopN{TopN: topNList} + topn.Scale(sampleFactor) if uint64(count) <= topn.TotalCount() || int(hg.NDV) <= len(topn.TopN) { // TopN includes all sample data diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go index 9406d9eb7a5b2..0757e7a0a3d4d 100644 --- a/statistics/cmsketch.go +++ b/statistics/cmsketch.go @@ -495,6 +495,13 @@ type TopN struct { TopN []TopNMeta } +// Scale scales the TopN by the given factor. +func (c *TopN) Scale(scaleFactor float64) { + for i := range c.TopN { + c.TopN[i].Count = uint64(float64(c.TopN[i].Count) * scaleFactor) + } +} + // AppendTopN appends a topn into the TopN struct. func (c *TopN) AppendTopN(data []byte, count uint64) { if c == nil { diff --git a/statistics/cmsketch_test.go b/statistics/cmsketch_test.go index 1585342d8826b..8f9bccf3cfb56 100644 --- a/statistics/cmsketch_test.go +++ b/statistics/cmsketch_test.go @@ -390,3 +390,23 @@ func TestMergePartTopN2GlobalTopNWithHists(t *testing.T) { require.Equal(t, uint64(55), globalTopN.TotalCount(), "should have 55") require.Len(t, leftTopN, 1, "should have 1 left topN") } + +func TestTopNScale(t *testing.T) { + for _, scaleFactor := range []float64{0.9999, 1.00001, 1.9999, 4.9999, 5.001, 9.99} { + var data []TopNMeta + sumCount := uint64(0) + for i := 0; i < 20; i++ { + cnt := uint64(rand.Intn(100000)) + data = append(data, TopNMeta{ + Count: cnt, + }) + sumCount += cnt + } + topN := TopN{TopN: data} + topN.Scale(scaleFactor) + scaleCount := float64(sumCount) * scaleFactor + delta := math.Abs(float64(topN.TotalCount()) - scaleCount) + roundErrorRatio := delta / scaleCount + require.Less(t, roundErrorRatio, 0.0001) + } +}