Skip to content

Commit

Permalink
planner: a better way to round scale factor when collecting TopN stats (
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-chi-bot authored Feb 20, 2024
1 parent a91305e commit 14a7263
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pkg/statistics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ go_test(
data = glob(["testdata/**"]),
embed = [":statistics"],
flaky = True,
shard_count = 33,
shard_count = 34,
deps = [
"//pkg/config",
"//pkg/parser/ast",
Expand Down
4 changes: 1 addition & 3 deletions pkg/statistics/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,8 @@ func BuildHistAndTopN(
}
}

for i := 0; i < len(topNList); i++ {
topNList[i].Count *= uint64(sampleFactor)
}
topn := &TopN{TopN: topNList}
topn.Scale(sampleFactor)

if uint64(count) <= topn.TotalCount() || int(hg.NDV) <= len(topn.TopN) {
// TopN includes all sample data
Expand Down
7 changes: 7 additions & 0 deletions pkg/statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,13 @@ type TopN struct {
TopN []TopNMeta
}

// Scale scales the TopN by the given factor.
func (c *TopN) Scale(scaleFactor float64) {
for i := range c.TopN {
c.TopN[i].Count = uint64(float64(c.TopN[i].Count) * scaleFactor)
}
}

// AppendTopN appends a topn into the TopN struct.
func (c *TopN) AppendTopN(data []byte, count uint64) {
if c == nil {
Expand Down
20 changes: 20 additions & 0 deletions pkg/statistics/cmsketch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,23 @@ func TestSortTopnMeta(t *testing.T) {
SortTopnMeta(data)
require.Equal(t, uint64(2), data[0].Count)
}

func TestTopNScale(t *testing.T) {
for _, scaleFactor := range []float64{0.9999, 1.00001, 1.9999, 4.9999, 5.001, 9.99} {
var data []TopNMeta
sumCount := uint64(0)
for i := 0; i < 20; i++ {
cnt := uint64(rand.Intn(100000))
data = append(data, TopNMeta{
Count: cnt,
})
sumCount += cnt
}
topN := TopN{TopN: data}
topN.Scale(scaleFactor)
scaleCount := float64(sumCount) * scaleFactor
delta := math.Abs(float64(topN.TotalCount()) - scaleCount)
roundErrorRatio := delta / scaleCount
require.Less(t, roundErrorRatio, 0.0001)
}
}

0 comments on commit 14a7263

Please sign in to comment.