From 0f02a40645ed3dc52fe36dde460896193ab31fd6 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Tue, 10 Jan 2023 19:21:13 +0800 Subject: [PATCH 1/4] fix --- statistics/index.go | 12 +++-- statistics/selectivity_test.go | 85 +++++++++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 6 deletions(-) diff --git a/statistics/index.go b/statistics/index.go index 71d2aa839bd61..df383cb5832e2 100644 --- a/statistics/index.go +++ b/statistics/index.go @@ -222,6 +222,7 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang totalCount := float64(0) isSingleCol := len(idx.Info.Columns) == 1 for _, indexRange := range indexRanges { + var count float64 lb, err := codec.EncodeKey(sc, nil, indexRange.LowVal...) if err != nil { return 0, err @@ -242,7 +243,7 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang totalCount++ continue } - count := idx.equalRowCount(lb, realtimeRowCount) + count = idx.equalRowCount(lb, realtimeRowCount) // If the current table row count has changed, we should scale the row count accordingly. count *= idx.GetIncreaseFactor(realtimeRowCount) totalCount += count @@ -262,7 +263,7 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang r := types.NewBytesDatum(rb) lowIsNull := bytes.Equal(lb, nullKeyBytes) if isSingleCol && lowIsNull { - totalCount += float64(idx.Histogram.NullCount) + count += float64(idx.Histogram.NullCount) } expBackoffSuccess := false // Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything. @@ -301,11 +302,11 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang } } if !expBackoffSuccess { - totalCount += idx.BetweenRowCount(l, r) + count += idx.BetweenRowCount(l, r) } // If the current table row count has changed, we should scale the row count accordingly. - totalCount *= idx.GetIncreaseFactor(realtimeRowCount) + count *= idx.GetIncreaseFactor(realtimeRowCount) // handling the out-of-range part if (idx.outOfRange(l) && !(isSingleCol && lowIsNull)) || idx.outOfRange(r) { @@ -313,8 +314,9 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang if increaseCount < 0 { increaseCount = 0 } - totalCount += idx.Histogram.outOfRangeRowCount(&l, &r, increaseCount) + count += idx.Histogram.outOfRangeRowCount(&l, &r, increaseCount) } + totalCount += count } totalCount = mathutil.Clamp(totalCount, 0, float64(realtimeRowCount)) return totalCount, nil diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index f0dad37f7cac8..bb7795e5c8d1a 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -17,6 +17,7 @@ package statistics_test import ( "context" "fmt" + "golang.org/x/exp/slices" "math" "os" "regexp" @@ -853,7 +854,7 @@ func prepareSelectivity(testKit *testkit.TestKit, dom *domain.Domain) (*statisti return statsTbl, nil } -func getRange(start, end int64) []*ranger.Range { +func getRange(start, end int64) ranger.Ranges { ran := &ranger.Range{ LowVal: []types.Datum{types.NewIntDatum(start)}, HighVal: []types.Datum{types.NewIntDatum(end)}, @@ -862,6 +863,21 @@ func getRange(start, end int64) []*ranger.Range { return []*ranger.Range{ran} } +func getRanges(start, end []int64) (res ranger.Ranges) { + if len(start) != len(end) { + return nil + } + for i := range start { + ran := &ranger.Range{ + LowVal: []types.Datum{types.NewIntDatum(start[i])}, + HighVal: []types.Datum{types.NewIntDatum(end[i])}, + Collators: collate.GetBinaryCollatorSlice(1), + } + res = append(res, ran) + } + return +} + func TestSelectivityGreedyAlgo(t *testing.T) { nodes := make([]*statistics.StatsNode, 3) nodes[0] = statistics.MockStatsNode(1, 3, 2) @@ -991,3 +1007,70 @@ type outputType struct { SQL string Result []string } + +func generateMapsForMockStatsTbl(statsTbl *statistics.Table) { + idx2Columns := make(map[int64][]int64) + colID2IdxIDs := make(map[int64][]int64) + for _, idxHist := range statsTbl.Indices { + ids := make([]int64, 0, len(idxHist.Info.Columns)) + for _, idxCol := range idxHist.Info.Columns { + ids = append(ids, int64(idxCol.Offset)) + } + colID2IdxIDs[ids[0]] = append(colID2IdxIDs[ids[0]], idxHist.ID) + idx2Columns[idxHist.ID] = ids + } + for _, idxIDs := range colID2IdxIDs { + slices.Sort(idxIDs) + } + statsTbl.Idx2ColumnIDs = idx2Columns + statsTbl.ColID2IdxIDs = colID2IdxIDs +} + +func TestIssue39593(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + testKit := testkit.NewTestKit(t, store) + + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a int, b int, index idx(a, b))") + is := dom.InfoSchema() + tb, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblInfo := tb.Meta() + + // mock the statistics.Table + statsTbl := mockStatsTable(tblInfo, 540) + colValues, err := generateIntDatum(1, 54) + require.NoError(t, err) + for i := 1; i <= 2; i++ { + statsTbl.Columns[int64(i)] = &statistics.Column{ + Histogram: *mockStatsHistogram(int64(i), colValues, 10, types.NewFieldType(mysql.TypeLonglong)), + Info: tblInfo.Columns[i-1], + StatsLoadedStatus: statistics.NewStatsFullLoadStatus(), + StatsVer: 2, + } + } + idxValues, err := generateIntDatum(2, 3) + require.NoError(t, err) + tp := types.NewFieldType(mysql.TypeBlob) + statsTbl.Indices[1] = &statistics.Index{ + Histogram: *mockStatsHistogram(1, idxValues, 60, tp), + Info: tblInfo.Indices[0], + StatsVer: 2, + } + generateMapsForMockStatsTbl(statsTbl) + + + sctx := testKit.Session() + idxID := tblInfo.Indices[0].ID + vals := []int64{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} + count, err := statsTbl.GetRowCountByIndexRanges(sctx, idxID, getRanges(vals,vals)) + require.NoError(t, err) + // estimated row count without any changes + require.Equal(t, float64(360), count) + statsTbl.Count *= 10 + count, err = statsTbl.GetRowCountByIndexRanges(sctx, idxID, getRanges(vals,vals)) + require.NoError(t, err) + // estimated row count after mock modify on the table + require.Equal(t, float64(3870), count) +} From 176734dff1cd772072b8a83d034642a790370c5d Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Tue, 10 Jan 2023 19:39:37 +0800 Subject: [PATCH 2/4] fmt --- statistics/selectivity_test.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 376dc5a1d0eee..338e3b8e4a8d3 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -1144,16 +1144,15 @@ func TestIssue39593(t *testing.T) { } generateMapsForMockStatsTbl(statsTbl) - sctx := testKit.Session() idxID := tblInfo.Indices[0].ID - vals := []int64{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20} - count, err := statsTbl.GetRowCountByIndexRanges(sctx, idxID, getRanges(vals,vals)) + vals := []int64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20} + count, err := statsTbl.GetRowCountByIndexRanges(sctx, idxID, getRanges(vals, vals)) require.NoError(t, err) // estimated row count without any changes require.Equal(t, float64(360), count) statsTbl.Count *= 10 - count, err = statsTbl.GetRowCountByIndexRanges(sctx, idxID, getRanges(vals,vals)) + count, err = statsTbl.GetRowCountByIndexRanges(sctx, idxID, getRanges(vals, vals)) require.NoError(t, err) // estimated row count after mock modify on the table require.Equal(t, float64(3870), count) From 6330b2e1b26e095e6a9c913adb642ba2bda24be2 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Tue, 10 Jan 2023 19:40:32 +0800 Subject: [PATCH 3/4] bazel update --- statistics/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/statistics/BUILD.bazel b/statistics/BUILD.bazel index 6a1b3d5a54921..e6992020197c3 100644 --- a/statistics/BUILD.bazel +++ b/statistics/BUILD.bazel @@ -112,6 +112,7 @@ go_test( "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_log//:log", "@com_github_stretchr_testify//require", + "@org_golang_x_exp//slices", "@org_uber_go_goleak//:goleak", "@org_uber_go_zap//:zap", ], From 0d0e580ae334a1cb2c1d18632c404e0e5132d208 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Tue, 10 Jan 2023 20:04:45 +0800 Subject: [PATCH 4/4] update test result --- statistics/selectivity_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 338e3b8e4a8d3..05a7413fa3d09 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -1155,5 +1155,5 @@ func TestIssue39593(t *testing.T) { count, err = statsTbl.GetRowCountByIndexRanges(sctx, idxID, getRanges(vals, vals)) require.NoError(t, err) // estimated row count after mock modify on the table - require.Equal(t, float64(3870), count) + require.Equal(t, float64(3600), count) }