Skip to content

Commit 4021862

Browse files
alivxxxzz-jason
authored andcommitted
stats: fix histogram boundaries overflow error (#7883) (#7944)
1 parent 52d5ee2 commit 4021862

File tree

6 files changed

+159
-29
lines changed

6 files changed

+159
-29
lines changed

statistics/feedback.go

+84-20
Original file line numberDiff line numberDiff line change
@@ -243,20 +243,79 @@ type BucketFeedback struct {
243243
upper *types.Datum // The upper bound of the new bucket.
244244
}
245245

246+
// outOfRange checks if the `val` is between `min` and `max`.
247+
func outOfRange(sc *stmtctx.StatementContext, min, max, val *types.Datum) (int, error) {
248+
result, err := val.CompareDatum(sc, min)
249+
if err != nil {
250+
return 0, err
251+
}
252+
if result < 0 {
253+
return result, nil
254+
}
255+
result, err = val.CompareDatum(sc, max)
256+
if err != nil {
257+
return 0, err
258+
}
259+
if result > 0 {
260+
return result, nil
261+
}
262+
return 0, nil
263+
}
264+
265+
// adjustFeedbackBoundaries adjust the feedback boundaries according to the `min` and `max`.
266+
// If the feedback has no intersection with `min` and `max`, we could just skip this feedback.
267+
func (f *feedback) adjustFeedbackBoundaries(sc *stmtctx.StatementContext, min, max *types.Datum) (bool, error) {
268+
result, err := outOfRange(sc, min, max, f.lower)
269+
if err != nil {
270+
return false, err
271+
}
272+
if result > 0 {
273+
return true, nil
274+
}
275+
if result < 0 {
276+
f.lower = min
277+
}
278+
result, err = outOfRange(sc, min, max, f.upper)
279+
if err != nil {
280+
return false, err
281+
}
282+
if result < 0 {
283+
return true, nil
284+
}
285+
if result > 0 {
286+
f.upper = max
287+
}
288+
return false, nil
289+
}
290+
246291
// buildBucketFeedback build the feedback for each bucket from the histogram feedback.
247292
func buildBucketFeedback(h *Histogram, feedback *QueryFeedback) (map[int]*BucketFeedback, int) {
248293
bktID2FB := make(map[int]*BucketFeedback)
294+
if len(feedback.feedback) == 0 {
295+
return bktID2FB, 0
296+
}
249297
total := 0
250-
for _, ran := range feedback.feedback {
251-
idx, _ := h.Bounds.LowerBound(0, ran.lower)
298+
sc := &stmtctx.StatementContext{TimeZone: time.UTC}
299+
kind := feedback.feedback[0].lower.Kind()
300+
min, max := getMinValue(kind, h.tp), getMaxValue(kind, h.tp)
301+
for _, fb := range feedback.feedback {
302+
skip, err := fb.adjustFeedbackBoundaries(sc, &min, &max)
303+
if err != nil {
304+
log.Debugf("adjust feedback boundaries failed, err: %v", errors.ErrorStack(err))
305+
continue
306+
}
307+
if skip {
308+
continue
309+
}
310+
idx, _ := h.Bounds.LowerBound(0, fb.lower)
252311
bktIdx := 0
253312
// The last bucket also stores the feedback that falls outside the upper bound.
254313
if idx >= h.Bounds.NumRows()-2 {
255314
bktIdx = h.Len() - 1
256315
} else {
257316
bktIdx = idx / 2
258317
// Make sure that this feedback lies within the bucket.
259-
if chunk.Compare(h.Bounds.GetRow(2*bktIdx+1), 0, ran.upper) < 0 {
318+
if chunk.Compare(h.Bounds.GetRow(2*bktIdx+1), 0, fb.upper) < 0 {
260319
continue
261320
}
262321
}
@@ -266,23 +325,23 @@ func buildBucketFeedback(h *Histogram, feedback *QueryFeedback) (map[int]*Bucket
266325
bkt = &BucketFeedback{lower: h.GetLower(bktIdx), upper: h.GetUpper(bktIdx)}
267326
bktID2FB[bktIdx] = bkt
268327
}
269-
bkt.feedback = append(bkt.feedback, ran)
328+
bkt.feedback = append(bkt.feedback, fb)
270329
// Update the bound if necessary.
271-
res, err := bkt.lower.CompareDatum(nil, ran.lower)
330+
res, err := bkt.lower.CompareDatum(nil, fb.lower)
272331
if err != nil {
273-
log.Debugf("compare datum %v with %v failed, err: %v", bkt.lower, ran.lower, errors.ErrorStack(err))
332+
log.Debugf("compare datum %v with %v failed, err: %v", bkt.lower, fb.lower, errors.ErrorStack(err))
274333
continue
275334
}
276335
if res > 0 {
277-
bkt.lower = ran.lower
336+
bkt.lower = fb.lower
278337
}
279-
res, err = bkt.upper.CompareDatum(nil, ran.upper)
338+
res, err = bkt.upper.CompareDatum(nil, fb.upper)
280339
if err != nil {
281-
log.Debugf("compare datum %v with %v failed, err: %v", bkt.upper, ran.upper, errors.ErrorStack(err))
340+
log.Debugf("compare datum %v with %v failed, err: %v", bkt.upper, fb.upper, errors.ErrorStack(err))
282341
continue
283342
}
284343
if res < 0 {
285-
bkt.upper = ran.upper
344+
bkt.upper = fb.upper
286345
}
287346
}
288347
return bktID2FB, total
@@ -528,7 +587,12 @@ func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int6
528587
func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram {
529588
buckets, isNewBuckets, totalCount := splitBuckets(h, feedback)
530589
buckets = mergeBuckets(buckets, isNewBuckets, float64(totalCount))
531-
return buildNewHistogram(h, buckets)
590+
hist := buildNewHistogram(h, buckets)
591+
// Update the NDV of primary key column.
592+
if feedback.tp == pkType {
593+
hist.NDV = int64(hist.totalRowCount())
594+
}
595+
return hist
532596
}
533597

534598
// UpdateCMSketch updates the CMSketch by feedback.
@@ -1077,13 +1141,13 @@ func supportColumnType(k byte) bool {
10771141
func getMaxValue(k byte, ft *types.FieldType) (max types.Datum) {
10781142
switch k {
10791143
case types.KindInt64:
1080-
max.SetInt64(math.MaxInt64)
1144+
max.SetInt64(types.SignedUpperBound[ft.Tp])
10811145
case types.KindUint64:
1082-
max.SetUint64(math.MaxUint64)
1146+
max.SetUint64(types.UnsignedUpperBound[ft.Tp])
10831147
case types.KindFloat32:
1084-
max.SetFloat32(math.MaxFloat32)
1148+
max.SetFloat32(float32(types.GetMaxFloat(ft.Flen, ft.Decimal)))
10851149
case types.KindFloat64:
1086-
max.SetFloat64(math.MaxFloat64)
1150+
max.SetFloat64(types.GetMaxFloat(ft.Flen, ft.Decimal))
10871151
case types.KindString, types.KindBytes:
10881152
val := types.MaxValueDatum()
10891153
bytes, err := codec.EncodeKey(nil, nil, val)
@@ -1093,7 +1157,7 @@ func getMaxValue(k byte, ft *types.FieldType) (max types.Datum) {
10931157
}
10941158
max.SetBytes(bytes)
10951159
case types.KindMysqlDecimal:
1096-
max.SetMysqlDecimal(types.NewMaxOrMinDec(false, mysql.MaxDecimalWidth, 0))
1160+
max.SetMysqlDecimal(types.NewMaxOrMinDec(false, ft.Flen, ft.Decimal))
10971161
case types.KindMysqlDuration:
10981162
max.SetMysqlDuration(types.Duration{Duration: math.MaxInt64})
10991163
case types.KindMysqlTime:
@@ -1109,13 +1173,13 @@ func getMaxValue(k byte, ft *types.FieldType) (max types.Datum) {
11091173
func getMinValue(k byte, ft *types.FieldType) (min types.Datum) {
11101174
switch k {
11111175
case types.KindInt64:
1112-
min.SetInt64(math.MinInt64)
1176+
min.SetInt64(types.SignedLowerBound[ft.Tp])
11131177
case types.KindUint64:
11141178
min.SetUint64(0)
11151179
case types.KindFloat32:
1116-
min.SetFloat32(-math.MaxFloat32)
1180+
min.SetFloat32(float32(-types.GetMaxFloat(ft.Flen, ft.Decimal)))
11171181
case types.KindFloat64:
1118-
min.SetFloat64(-math.MaxFloat64)
1182+
min.SetFloat64(-types.GetMaxFloat(ft.Flen, ft.Decimal))
11191183
case types.KindString, types.KindBytes:
11201184
val := types.MinNotNullDatum()
11211185
bytes, err := codec.EncodeKey(nil, nil, val)
@@ -1125,7 +1189,7 @@ func getMinValue(k byte, ft *types.FieldType) (min types.Datum) {
11251189
}
11261190
min.SetBytes(bytes)
11271191
case types.KindMysqlDecimal:
1128-
min.SetMysqlDecimal(types.NewMaxOrMinDec(true, mysql.MaxDecimalWidth, 0))
1192+
min.SetMysqlDecimal(types.NewMaxOrMinDec(true, ft.Flen, ft.Decimal))
11291193
case types.KindMysqlDuration:
11301194
min.SetMysqlDuration(types.Duration{Duration: math.MinInt64})
11311195
case types.KindMysqlTime:

statistics/feedback_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func (s *testFeedbackSuite) TestUpdateHistogram(c *C) {
7070
defaultBucketCount = 7
7171
defer func() { defaultBucketCount = originBucketCount }()
7272
c.Assert(UpdateHistogram(q.Hist(), q).ToString(0), Equals,
73-
"column:0 ndv:0 totColSize:0\n"+
73+
"column:0 ndv:10057 totColSize:0\n"+
7474
"num: 10000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
7575
"num: 8 lower_bound: 2 upper_bound: 7 repeats: 0\n"+
7676
"num: 11 lower_bound: 8 upper_bound: 19 repeats: 0\n"+

statistics/update.go

-4
Original file line numberDiff line numberDiff line change
@@ -558,10 +558,6 @@ func (h *Handle) handleSingleHistogramUpdate(is infoschema.InfoSchema, rows []ch
558558
log.Debugf("decode feedback failed, err: %v", errors.ErrorStack(err))
559559
}
560560
}
561-
// Update the NDV of primary key column.
562-
if table.Meta().PKIsHandle && q.tp == pkType {
563-
hist.NDV = int64(hist.totalRowCount())
564-
}
565561
err = h.dumpStatsUpdateToKV(physicalTableID, isIndex, q, hist, cms)
566562
return errors.Trace(err)
567563
}

statistics/update_test.go

+70-1
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ func (s *testStatsUpdateSuite) TestQueryFeedback(c *C) {
594594
{
595595
// test primary key feedback
596596
sql: "select * from t where t.a <= 5",
597-
hist: "column:1 ndv:3 totColSize:0\n" +
597+
hist: "column:1 ndv:4 totColSize:0\n" +
598598
"num: 1 lower_bound: -9223372036854775808 upper_bound: 1 repeats: 0\n" +
599599
"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1\n" +
600600
"num: 2 lower_bound: 3 upper_bound: 5 repeats: 0",
@@ -1118,3 +1118,72 @@ func (s *testStatsUpdateSuite) TestIndexQueryFeedback(c *C) {
11181118
c.Assert(tbl.Indices[t.idxID].CMSketch.QueryBytes(val), Equals, t.eqCount)
11191119
}
11201120
}
1121+
1122+
func (s *testStatsUpdateSuite) TestFeedbackRanges(c *C) {
1123+
defer cleanEnv(c, s.store, s.do)
1124+
testKit := testkit.NewTestKit(c, s.store)
1125+
h := s.do.StatsHandle()
1126+
oriProbability := statistics.FeedbackProbability
1127+
oriNumber := statistics.MaxNumberOfRanges
1128+
defer func() {
1129+
statistics.FeedbackProbability = oriProbability
1130+
statistics.MaxNumberOfRanges = oriNumber
1131+
}()
1132+
statistics.FeedbackProbability = 1
1133+
1134+
testKit.MustExec("use test")
1135+
testKit.MustExec("create table t (a tinyint, b tinyint, primary key(a), index idx(a, b))")
1136+
for i := 0; i < 20; i++ {
1137+
testKit.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i))
1138+
}
1139+
h.HandleDDLEvent(<-h.DDLEventCh())
1140+
c.Assert(h.DumpStatsDeltaToKV(statistics.DumpAll), IsNil)
1141+
testKit.MustExec("analyze table t with 3 buckets")
1142+
for i := 30; i < 40; i++ {
1143+
testKit.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i))
1144+
}
1145+
c.Assert(h.DumpStatsDeltaToKV(statistics.DumpAll), IsNil)
1146+
tests := []struct {
1147+
sql string
1148+
hist string
1149+
colID int64
1150+
}{
1151+
{
1152+
sql: "select * from t where a <= 50 or (a > 130 and a < 140)",
1153+
hist: "column:1 ndv:30 totColSize:0\n" +
1154+
"num: 8 lower_bound: -128 upper_bound: 7 repeats: 0\n" +
1155+
"num: 8 lower_bound: 8 upper_bound: 15 repeats: 0\n" +
1156+
"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0",
1157+
colID: 1,
1158+
},
1159+
{
1160+
sql: "select * from t where a >= 10",
1161+
hist: "column:1 ndv:30 totColSize:0\n" +
1162+
"num: 8 lower_bound: -128 upper_bound: 7 repeats: 0\n" +
1163+
"num: 8 lower_bound: 8 upper_bound: 15 repeats: 0\n" +
1164+
"num: 14 lower_bound: 16 upper_bound: 127 repeats: 0",
1165+
colID: 1,
1166+
},
1167+
{
1168+
sql: "select * from t use index(idx) where a = 1 and (b <= 50 or (b > 130 and b < 140))",
1169+
hist: "column:2 ndv:20 totColSize:20\n" +
1170+
"num: 7 lower_bound: -128 upper_bound: 6 repeats: 0\n" +
1171+
"num: 7 lower_bound: 7 upper_bound: 13 repeats: 1\n" +
1172+
"num: 6 lower_bound: 14 upper_bound: 19 repeats: 1",
1173+
colID: 2,
1174+
},
1175+
}
1176+
is := s.do.InfoSchema()
1177+
table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
1178+
for i, t := range tests {
1179+
testKit.MustQuery(t.sql)
1180+
c.Assert(h.DumpStatsDeltaToKV(statistics.DumpAll), IsNil)
1181+
c.Assert(h.DumpStatsFeedbackToKV(), IsNil)
1182+
c.Assert(h.HandleUpdateStats(s.do.InfoSchema()), IsNil)
1183+
c.Assert(err, IsNil)
1184+
h.Update(is)
1185+
tblInfo := table.Meta()
1186+
tbl := h.GetTableStats(tblInfo)
1187+
c.Assert(tbl.Columns[t.colID].ToString(0), Equals, tests[i].hist)
1188+
}
1189+
}

types/etc_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ func (s *testTypeEtcSuite) TestMaxFloat(c *C) {
122122
}
123123

124124
for _, t := range tbl {
125-
f := getMaxFloat(t.Flen, t.Decimal)
125+
f := GetMaxFloat(t.Flen, t.Decimal)
126126
c.Assert(f, Equals, t.Expect)
127127
}
128128
}

types/helper.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ func Truncate(f float64, dec int) float64 {
5959
return math.Trunc(tmp) / shift
6060
}
6161

62-
func getMaxFloat(flen int, decimal int) float64 {
62+
// GetMaxFloat gets the max float for given flen and decimal.
63+
func GetMaxFloat(flen int, decimal int) float64 {
6364
intPartLen := flen - decimal
6465
f := math.Pow10(intPartLen)
6566
f -= math.Pow10(-decimal)
@@ -74,7 +75,7 @@ func TruncateFloat(f float64, flen int, decimal int) (float64, error) {
7475
return 0, ErrOverflow.GenWithStackByArgs("DOUBLE", "")
7576
}
7677

77-
maxF := getMaxFloat(flen, decimal)
78+
maxF := GetMaxFloat(flen, decimal)
7879

7980
if !math.IsInf(f, 0) {
8081
f = Round(f, decimal)

0 commit comments

Comments
 (0)