From 097c073e5eb09070a9f3556d5788f715e582fd32 Mon Sep 17 00:00:00 2001 From: Zhou Kunqin <25057648+time-and-fate@users.noreply.github.com> Date: Tue, 17 Aug 2021 17:04:00 +0800 Subject: [PATCH 1/2] cherry pick #27295 to release-4.0 Signed-off-by: ti-srebot --- statistics/selectivity.go | 2 +- statistics/selectivity_test.go | 157 +++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+), 1 deletion(-) diff --git a/statistics/selectivity.go b/statistics/selectivity.go index cdb5d862324ca..4d257713f4f09 100644 --- a/statistics/selectivity.go +++ b/statistics/selectivity.go @@ -305,10 +305,10 @@ func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, ran } var res *ranger.DetachRangeResult res, err = ranger.DetachCondAndBuildRangeForIndex(ctx, exprs, cols, lengths) - ranges, accessConds, remainedConds, isDNF = res.Ranges, res.AccessConds, res.RemainedConds, res.IsDNFCond if err != nil { return 0, nil, false, err } + ranges, accessConds, remainedConds, isDNF = res.Ranges, res.AccessConds, res.RemainedConds, res.IsDNFCond default: panic("should never be here") } diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index c1be105d3ccc6..75de7224426f0 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -667,3 +667,160 @@ func (s *testStatsSuite) TestCollationColumnEstimate(c *C) { tk.MustQuery(input[i]).Check(testkit.Rows(output[i]...)) } } +<<<<<<< HEAD +======= + +// TestDNFCondSelectivity tests selectivity calculation with DNF conditions covered by using independence assumption. +func (s *testStatsSuite) TestDNFCondSelectivity(c *C) { + defer cleanEnv(c, s.store, s.do) + testKit := testkit.NewTestKit(c, s.store) + + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a int, b int, c int, d int)") + testKit.MustExec("insert into t value(1,5,4,4),(3,4,1,8),(4,2,6,10),(6,7,2,5),(7,1,4,9),(8,9,8,3),(9,1,9,1),(10,6,6,2)") + testKit.MustExec("alter table t add index (b)") + testKit.MustExec("alter table t add index (d)") + testKit.MustExec(`analyze table t`) + + ctx := context.Background() + h := s.do.StatsHandle() + tb, err := s.do.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + c.Assert(err, IsNil) + tblInfo := tb.Meta() + statsTbl := h.GetTableStats(tblInfo) + + var ( + input []string + output []struct { + SQL string + Selectivity float64 + } + ) + s.testData.GetTestCases(c, &input, &output) + for i, tt := range input { + sctx := testKit.Se.(sessionctx.Context) + stmts, err := session.Parse(sctx, tt) + c.Assert(err, IsNil, Commentf("error %v, for sql %s", err, tt)) + c.Assert(stmts, HasLen, 1) + + ret := &plannercore.PreprocessorReturn{} + err = plannercore.Preprocess(sctx, stmts[0], plannercore.WithPreprocessorReturn(ret)) + c.Assert(err, IsNil, Commentf("error %v, for sql %s", err, tt)) + p, _, err := plannercore.BuildLogicalPlan(ctx, sctx, stmts[0], ret.InfoSchema) + c.Assert(err, IsNil, Commentf("error %v, for building plan, sql %s", err, tt)) + + sel := p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection) + ds := sel.Children()[0].(*plannercore.DataSource) + + histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.Columns, ds.Schema().Columns) + + ratio, _, err := histColl.Selectivity(sctx, sel.Conditions, nil) + c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt)) + s.testData.OnRecord(func() { + output[i].SQL = tt + output[i].Selectivity = ratio + }) + c.Assert(math.Abs(ratio-output[i].Selectivity) < eps, IsTrue, + Commentf("for %s, needed: %v, got: %v", tt, output[i].Selectivity, ratio)) + } + + // Test issue 19981 + testKit.MustExec("select * from t where _tidb_rowid is null or _tidb_rowid > 7") + + // Test issue 22134 + // Information about column n will not be in stats immediately after this SQL executed. + // If we don't have a check against this, DNF condition could lead to infinite recursion in Selectivity(). + testKit.MustExec("alter table t add column n timestamp;") + testKit.MustExec("select * from t where n = '2000-01-01' or n = '2000-01-02';") + + // Test issue 27294 + testKit.MustExec("create table tt (COL1 blob DEFAULT NULL,COL2 decimal(37,4) DEFAULT NULL,COL3 timestamp NULL DEFAULT NULL,COL4 int(11) DEFAULT NULL,UNIQUE KEY U_M_COL4(COL1(10),COL2), UNIQUE KEY U_M_COL5(COL3,COL2));") + testKit.MustExec("explain select * from tt where col1 is not null or col2 not between 454623814170074.2771 and -975540642273402.9269 and col3 not between '2039-1-19 10:14:57' and '2002-3-27 14:40:23';") +} + +func (s *testStatsSuite) TestIndexEstimationCrossValidate(c *C) { + defer cleanEnv(c, s.store, s.do) + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a int, b int, key(a,b))") + tk.MustExec("insert into t values(1, 1), (1, 2), (1, 3), (2, 2)") + tk.MustExec("analyze table t") + c.Assert(failpoint.Enable("github.com/pingcap/tidb/statistics/table/mockQueryBytesMaxUint64", `return(100000)`), IsNil) + tk.MustQuery("explain select * from t where a = 1 and b = 2").Check(testkit.Rows( + "IndexReader_6 1.00 root index:IndexRangeScan_5", + "└─IndexRangeScan_5 1.00 cop[tikv] table:t, index:a(a, b) range:[1 2,1 2], keep order:false")) + c.Assert(failpoint.Disable("github.com/pingcap/tidb/statistics/table/mockQueryBytesMaxUint64"), IsNil) + + // Test issue 22466 + tk.MustExec("drop table if exists t2") + tk.MustExec("create table t2(a int, b int, key b(b))") + tk.MustExec("insert into t2 values(1, 1), (2, 2), (3, 3), (4, 4), (5,5)") + // This line of select will mark column b stats as needed, and an invalid(empty) stats for column b + // will be loaded at the next analyze line, this will trigger the bug. + tk.MustQuery("select * from t2 where b=2") + tk.MustExec("analyze table t2 index b") + tk.MustQuery("explain select * from t2 where b=2").Check(testkit.Rows( + "TableReader_7 1.00 root data:Selection_6", + "└─Selection_6 1.00 cop[tikv] eq(test.t2.b, 2)", + " └─TableFullScan_5 5.00 cop[tikv] table:t2 keep order:false")) +} + +func (s *testStatsSuite) TestRangeStepOverflow(c *C) { + defer cleanEnv(c, s.store, s.do) + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t (col datetime)") + tk.MustExec("insert into t values('3580-05-26 07:16:48'),('4055-03-06 22:27:16'),('4862-01-26 07:16:54')") + h := s.do.StatsHandle() + c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) + tk.MustExec("analyze table t") + // Trigger the loading of column stats. + tk.MustQuery("select * from t where col between '8499-1-23 2:14:38' and '9961-7-23 18:35:26'").Check(testkit.Rows()) + c.Assert(h.LoadNeededHistograms(), IsNil) + // Must execute successfully after loading the column stats. + tk.MustQuery("select * from t where col between '8499-1-23 2:14:38' and '9961-7-23 18:35:26'").Check(testkit.Rows()) +} + +func (s *testStatsSuite) TestSmallRangeEstimation(c *C) { + defer cleanEnv(c, s.store, s.do) + testKit := testkit.NewTestKit(c, s.store) + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a int)") + for i := 0; i < 400; i++ { + testKit.MustExec(fmt.Sprintf("insert into t values (%v), (%v), (%v)", i, i, i)) // [0, 400) + } + testKit.MustExec("analyze table t with 0 topn") + + h := s.do.StatsHandle() + table, err := s.do.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + c.Assert(err, IsNil) + statsTbl := h.GetTableStats(table.Meta()) + sc := &stmtctx.StatementContext{} + col := statsTbl.Columns[table.Meta().Columns[0].ID] + + var input []struct { + Start int64 + End int64 + } + var output []struct { + Start int64 + End int64 + Count float64 + } + s.testData.GetTestCases(c, &input, &output) + for i, ran := range input { + count, err := col.GetColumnRowCount(sc, getRange(ran.Start, ran.End), statsTbl.Count, false) + c.Assert(err, IsNil) + s.testData.OnRecord(func() { + output[i].Start = ran.Start + output[i].End = ran.End + output[i].Count = count + }) + c.Assert(math.Abs(count-output[i].Count) < eps, IsTrue, Commentf("for [%v, %v], needed: around %v, got: %v", ran.Start, ran.End, output[i].Count, count)) + } +} +>>>>>>> a0de91fa0... statistics: fix a error check to prevent nil dereference (#27295) From 0b1fc46df92a60b0e5c8570332fc8ae66596fc71 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Tue, 17 Aug 2021 17:52:20 +0800 Subject: [PATCH 2/2] resolve conflicts --- statistics/selectivity_test.go | 151 +-------------------------------- 1 file changed, 2 insertions(+), 149 deletions(-) diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 75de7224426f0..7218711b4881c 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -667,160 +667,13 @@ func (s *testStatsSuite) TestCollationColumnEstimate(c *C) { tk.MustQuery(input[i]).Check(testkit.Rows(output[i]...)) } } -<<<<<<< HEAD -======= -// TestDNFCondSelectivity tests selectivity calculation with DNF conditions covered by using independence assumption. -func (s *testStatsSuite) TestDNFCondSelectivity(c *C) { +func (s *testStatsSuite) TestIssue27294(c *C) { defer cleanEnv(c, s.store, s.do) testKit := testkit.NewTestKit(c, s.store) testKit.MustExec("use test") - testKit.MustExec("drop table if exists t") - testKit.MustExec("create table t(a int, b int, c int, d int)") - testKit.MustExec("insert into t value(1,5,4,4),(3,4,1,8),(4,2,6,10),(6,7,2,5),(7,1,4,9),(8,9,8,3),(9,1,9,1),(10,6,6,2)") - testKit.MustExec("alter table t add index (b)") - testKit.MustExec("alter table t add index (d)") - testKit.MustExec(`analyze table t`) - - ctx := context.Background() - h := s.do.StatsHandle() - tb, err := s.do.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) - c.Assert(err, IsNil) - tblInfo := tb.Meta() - statsTbl := h.GetTableStats(tblInfo) - - var ( - input []string - output []struct { - SQL string - Selectivity float64 - } - ) - s.testData.GetTestCases(c, &input, &output) - for i, tt := range input { - sctx := testKit.Se.(sessionctx.Context) - stmts, err := session.Parse(sctx, tt) - c.Assert(err, IsNil, Commentf("error %v, for sql %s", err, tt)) - c.Assert(stmts, HasLen, 1) - - ret := &plannercore.PreprocessorReturn{} - err = plannercore.Preprocess(sctx, stmts[0], plannercore.WithPreprocessorReturn(ret)) - c.Assert(err, IsNil, Commentf("error %v, for sql %s", err, tt)) - p, _, err := plannercore.BuildLogicalPlan(ctx, sctx, stmts[0], ret.InfoSchema) - c.Assert(err, IsNil, Commentf("error %v, for building plan, sql %s", err, tt)) - - sel := p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection) - ds := sel.Children()[0].(*plannercore.DataSource) - - histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.Columns, ds.Schema().Columns) - - ratio, _, err := histColl.Selectivity(sctx, sel.Conditions, nil) - c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt)) - s.testData.OnRecord(func() { - output[i].SQL = tt - output[i].Selectivity = ratio - }) - c.Assert(math.Abs(ratio-output[i].Selectivity) < eps, IsTrue, - Commentf("for %s, needed: %v, got: %v", tt, output[i].Selectivity, ratio)) - } - - // Test issue 19981 - testKit.MustExec("select * from t where _tidb_rowid is null or _tidb_rowid > 7") - - // Test issue 22134 - // Information about column n will not be in stats immediately after this SQL executed. - // If we don't have a check against this, DNF condition could lead to infinite recursion in Selectivity(). - testKit.MustExec("alter table t add column n timestamp;") - testKit.MustExec("select * from t where n = '2000-01-01' or n = '2000-01-02';") - - // Test issue 27294 + testKit.MustExec("drop table if exists tt") testKit.MustExec("create table tt (COL1 blob DEFAULT NULL,COL2 decimal(37,4) DEFAULT NULL,COL3 timestamp NULL DEFAULT NULL,COL4 int(11) DEFAULT NULL,UNIQUE KEY U_M_COL4(COL1(10),COL2), UNIQUE KEY U_M_COL5(COL3,COL2));") testKit.MustExec("explain select * from tt where col1 is not null or col2 not between 454623814170074.2771 and -975540642273402.9269 and col3 not between '2039-1-19 10:14:57' and '2002-3-27 14:40:23';") } - -func (s *testStatsSuite) TestIndexEstimationCrossValidate(c *C) { - defer cleanEnv(c, s.store, s.do) - tk := testkit.NewTestKit(c, s.store) - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("create table t(a int, b int, key(a,b))") - tk.MustExec("insert into t values(1, 1), (1, 2), (1, 3), (2, 2)") - tk.MustExec("analyze table t") - c.Assert(failpoint.Enable("github.com/pingcap/tidb/statistics/table/mockQueryBytesMaxUint64", `return(100000)`), IsNil) - tk.MustQuery("explain select * from t where a = 1 and b = 2").Check(testkit.Rows( - "IndexReader_6 1.00 root index:IndexRangeScan_5", - "└─IndexRangeScan_5 1.00 cop[tikv] table:t, index:a(a, b) range:[1 2,1 2], keep order:false")) - c.Assert(failpoint.Disable("github.com/pingcap/tidb/statistics/table/mockQueryBytesMaxUint64"), IsNil) - - // Test issue 22466 - tk.MustExec("drop table if exists t2") - tk.MustExec("create table t2(a int, b int, key b(b))") - tk.MustExec("insert into t2 values(1, 1), (2, 2), (3, 3), (4, 4), (5,5)") - // This line of select will mark column b stats as needed, and an invalid(empty) stats for column b - // will be loaded at the next analyze line, this will trigger the bug. - tk.MustQuery("select * from t2 where b=2") - tk.MustExec("analyze table t2 index b") - tk.MustQuery("explain select * from t2 where b=2").Check(testkit.Rows( - "TableReader_7 1.00 root data:Selection_6", - "└─Selection_6 1.00 cop[tikv] eq(test.t2.b, 2)", - " └─TableFullScan_5 5.00 cop[tikv] table:t2 keep order:false")) -} - -func (s *testStatsSuite) TestRangeStepOverflow(c *C) { - defer cleanEnv(c, s.store, s.do) - tk := testkit.NewTestKit(c, s.store) - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("create table t (col datetime)") - tk.MustExec("insert into t values('3580-05-26 07:16:48'),('4055-03-06 22:27:16'),('4862-01-26 07:16:54')") - h := s.do.StatsHandle() - c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) - tk.MustExec("analyze table t") - // Trigger the loading of column stats. - tk.MustQuery("select * from t where col between '8499-1-23 2:14:38' and '9961-7-23 18:35:26'").Check(testkit.Rows()) - c.Assert(h.LoadNeededHistograms(), IsNil) - // Must execute successfully after loading the column stats. - tk.MustQuery("select * from t where col between '8499-1-23 2:14:38' and '9961-7-23 18:35:26'").Check(testkit.Rows()) -} - -func (s *testStatsSuite) TestSmallRangeEstimation(c *C) { - defer cleanEnv(c, s.store, s.do) - testKit := testkit.NewTestKit(c, s.store) - testKit.MustExec("use test") - testKit.MustExec("drop table if exists t") - testKit.MustExec("create table t(a int)") - for i := 0; i < 400; i++ { - testKit.MustExec(fmt.Sprintf("insert into t values (%v), (%v), (%v)", i, i, i)) // [0, 400) - } - testKit.MustExec("analyze table t with 0 topn") - - h := s.do.StatsHandle() - table, err := s.do.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) - c.Assert(err, IsNil) - statsTbl := h.GetTableStats(table.Meta()) - sc := &stmtctx.StatementContext{} - col := statsTbl.Columns[table.Meta().Columns[0].ID] - - var input []struct { - Start int64 - End int64 - } - var output []struct { - Start int64 - End int64 - Count float64 - } - s.testData.GetTestCases(c, &input, &output) - for i, ran := range input { - count, err := col.GetColumnRowCount(sc, getRange(ran.Start, ran.End), statsTbl.Count, false) - c.Assert(err, IsNil) - s.testData.OnRecord(func() { - output[i].Start = ran.Start - output[i].End = ran.End - output[i].Count = count - }) - c.Assert(math.Abs(count-output[i].Count) < eps, IsTrue, Commentf("for [%v, %v], needed: around %v, got: %v", ran.Start, ran.End, output[i].Count, count)) - } -} ->>>>>>> a0de91fa0... statistics: fix a error check to prevent nil dereference (#27295)