Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#49808
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
  • Loading branch information
qw4990 authored and ti-chi-bot committed Dec 27, 2023
1 parent 81e0e2b commit 114d58c
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 0 deletions.
108 changes: 108 additions & 0 deletions pkg/statistics/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "statistics",
srcs = [
"analyze.go",
"analyze_jobs.go",
"builder.go",
"builder_ext_stats.go",
"cmsketch.go",
"cmsketch_util.go",
"column.go",
"debugtrace.go",
"estimate.go",
"fmsketch.go",
"histogram.go",
"index.go",
"row_sampler.go",
"sample.go",
"scalar.go",
"table.go",
],
importpath = "github.com/pingcap/tidb/pkg/statistics",
visibility = ["//visibility:public"],
deps = [
"//pkg/expression",
"//pkg/kv",
"//pkg/parser/ast",
"//pkg/parser/charset",
"//pkg/parser/model",
"//pkg/parser/mysql",
"//pkg/parser/terror",
"//pkg/planner/util/debugtrace",
"//pkg/sessionctx",
"//pkg/sessionctx/stmtctx",
"//pkg/sessionctx/variable",
"//pkg/statistics/handle/logutil",
"//pkg/tablecodec",
"//pkg/types",
"//pkg/util/chunk",
"//pkg/util/codec",
"//pkg/util/collate",
"//pkg/util/dbterror",
"//pkg/util/fastrand",
"//pkg/util/hack",
"//pkg/util/intest",
"//pkg/util/logutil",
"//pkg/util/memory",
"//pkg/util/ranger",
"//pkg/util/sqlexec",
"@com_github_dolthub_swiss//:swiss",
"@com_github_pingcap_errors//:errors",
"@com_github_pingcap_failpoint//:failpoint",
"@com_github_pingcap_tipb//go-tipb",
"@com_github_twmb_murmur3//:murmur3",
"@org_golang_x_exp//maps",
"@org_uber_go_atomic//:atomic",
"@org_uber_go_zap//:zap",
],
)

go_test(
name = "statistics_test",
timeout = "short",
srcs = [
"bench_daily_test.go",
"builder_test.go",
"cmsketch_test.go",
"fmsketch_test.go",
"histogram_bench_test.go",
"histogram_test.go",
"integration_test.go",
"main_test.go",
"sample_test.go",
"scalar_test.go",
"statistics_test.go",
],
data = glob(["testdata/**"]),
embed = [":statistics"],
flaky = True,
shard_count = 35,
deps = [
"//pkg/config",
"//pkg/parser/ast",
"//pkg/parser/model",
"//pkg/parser/mysql",
"//pkg/sessionctx",
"//pkg/sessionctx/stmtctx",
"//pkg/statistics/handle/autoanalyze",
"//pkg/testkit",
"//pkg/testkit/testdata",
"//pkg/testkit/testmain",
"//pkg/testkit/testsetup",
"//pkg/types",
"//pkg/util/benchdaily",
"//pkg/util/chunk",
"//pkg/util/codec",
"//pkg/util/collate",
"//pkg/util/memory",
"//pkg/util/mock",
"//pkg/util/ranger",
"//pkg/util/sqlexec",
"@com_github_pingcap_errors//:errors",
"@com_github_pingcap_failpoint//:failpoint",
"@com_github_stretchr_testify//require",
"@org_uber_go_goleak//:goleak",
],
)
4 changes: 4 additions & 0 deletions statistics/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ func BuildHistAndTopN(
if err != nil {
return nil, nil, errors.Trace(err)
}
<<<<<<< HEAD:statistics/builder.go
// For debugging invalid sample data.
var (
foundTwice bool
Expand Down Expand Up @@ -417,12 +418,15 @@ func BuildHistAndTopN(
continue
}
}
=======
>>>>>>> 1fb5a9ae14a (planner: a better way to round scale factor when collecting TopN stats (#49808)):pkg/statistics/builder.go
}

for i := 0; i < len(topNList); i++ {
topNList[i].Count *= uint64(sampleFactor)
}
topn := &TopN{TopN: topNList}
topn.Scale(sampleFactor)

if uint64(count) <= topn.TotalCount() || int(hg.NDV) <= len(topn.TopN) {
// TopN includes all sample data
Expand Down
7 changes: 7 additions & 0 deletions statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,13 @@ type TopN struct {
TopN []TopNMeta
}

// Scale scales the TopN by the given factor.
func (c *TopN) Scale(scaleFactor float64) {
for i := range c.TopN {
c.TopN[i].Count = uint64(float64(c.TopN[i].Count) * scaleFactor)
}
}

// AppendTopN appends a topn into the TopN struct.
func (c *TopN) AppendTopN(data []byte, count uint64) {
if c == nil {
Expand Down
20 changes: 20 additions & 0 deletions statistics/cmsketch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,3 +390,23 @@ func TestMergePartTopN2GlobalTopNWithHists(t *testing.T) {
require.Equal(t, uint64(55), globalTopN.TotalCount(), "should have 55")
require.Len(t, leftTopN, 1, "should have 1 left topN")
}

func TestTopNScale(t *testing.T) {
for _, scaleFactor := range []float64{0.9999, 1.00001, 1.9999, 4.9999, 5.001, 9.99} {
var data []TopNMeta
sumCount := uint64(0)
for i := 0; i < 20; i++ {
cnt := uint64(rand.Intn(100000))
data = append(data, TopNMeta{
Count: cnt,
})
sumCount += cnt
}
topN := TopN{TopN: data}
topN.Scale(scaleFactor)
scaleCount := float64(sumCount) * scaleFactor
delta := math.Abs(float64(topN.TotalCount()) - scaleCount)
roundErrorRatio := delta / scaleCount
require.Less(t, roundErrorRatio, 0.0001)
}
}

0 comments on commit 114d58c

Please sign in to comment.