From 097c073e5eb09070a9f3556d5788f715e582fd32 Mon Sep 17 00:00:00 2001
From: Zhou Kunqin <25057648+time-and-fate@users.noreply.github.com>
Date: Tue, 17 Aug 2021 17:04:00 +0800
Subject: [PATCH 1/2] cherry pick #27295 to release-4.0

Signed-off-by: ti-srebot <ti-srebot@pingcap.com>
---
 statistics/selectivity.go      |   2 +-
 statistics/selectivity_test.go | 157 +++++++++++++++++++++++++++++++++
 2 files changed, 158 insertions(+), 1 deletion(-)

diff --git a/statistics/selectivity.go b/statistics/selectivity.go
index cdb5d862324ca..4d257713f4f09 100644
--- a/statistics/selectivity.go
+++ b/statistics/selectivity.go
@@ -305,10 +305,10 @@ func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, ran
 		}
 		var res *ranger.DetachRangeResult
 		res, err = ranger.DetachCondAndBuildRangeForIndex(ctx, exprs, cols, lengths)
-		ranges, accessConds, remainedConds, isDNF = res.Ranges, res.AccessConds, res.RemainedConds, res.IsDNFCond
 		if err != nil {
 			return 0, nil, false, err
 		}
+		ranges, accessConds, remainedConds, isDNF = res.Ranges, res.AccessConds, res.RemainedConds, res.IsDNFCond
 	default:
 		panic("should never be here")
 	}
diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go
index c1be105d3ccc6..75de7224426f0 100644
--- a/statistics/selectivity_test.go
+++ b/statistics/selectivity_test.go
@@ -667,3 +667,160 @@ func (s *testStatsSuite) TestCollationColumnEstimate(c *C) {
 		tk.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
 	}
 }
+<<<<<<< HEAD
+=======
+
+// TestDNFCondSelectivity tests selectivity calculation with DNF conditions covered by using independence assumption.
+func (s *testStatsSuite) TestDNFCondSelectivity(c *C) {
+	defer cleanEnv(c, s.store, s.do)
+	testKit := testkit.NewTestKit(c, s.store)
+
+	testKit.MustExec("use test")
+	testKit.MustExec("drop table if exists t")
+	testKit.MustExec("create table t(a int, b int, c int, d int)")
+	testKit.MustExec("insert into t value(1,5,4,4),(3,4,1,8),(4,2,6,10),(6,7,2,5),(7,1,4,9),(8,9,8,3),(9,1,9,1),(10,6,6,2)")
+	testKit.MustExec("alter table t add index (b)")
+	testKit.MustExec("alter table t add index (d)")
+	testKit.MustExec(`analyze table t`)
+
+	ctx := context.Background()
+	h := s.do.StatsHandle()
+	tb, err := s.do.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
+	c.Assert(err, IsNil)
+	tblInfo := tb.Meta()
+	statsTbl := h.GetTableStats(tblInfo)
+
+	var (
+		input  []string
+		output []struct {
+			SQL         string
+			Selectivity float64
+		}
+	)
+	s.testData.GetTestCases(c, &input, &output)
+	for i, tt := range input {
+		sctx := testKit.Se.(sessionctx.Context)
+		stmts, err := session.Parse(sctx, tt)
+		c.Assert(err, IsNil, Commentf("error %v, for sql %s", err, tt))
+		c.Assert(stmts, HasLen, 1)
+
+		ret := &plannercore.PreprocessorReturn{}
+		err = plannercore.Preprocess(sctx, stmts[0], plannercore.WithPreprocessorReturn(ret))
+		c.Assert(err, IsNil, Commentf("error %v, for sql %s", err, tt))
+		p, _, err := plannercore.BuildLogicalPlan(ctx, sctx, stmts[0], ret.InfoSchema)
+		c.Assert(err, IsNil, Commentf("error %v, for building plan, sql %s", err, tt))
+
+		sel := p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection)
+		ds := sel.Children()[0].(*plannercore.DataSource)
+
+		histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.Columns, ds.Schema().Columns)
+
+		ratio, _, err := histColl.Selectivity(sctx, sel.Conditions, nil)
+		c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt))
+		s.testData.OnRecord(func() {
+			output[i].SQL = tt
+			output[i].Selectivity = ratio
+		})
+		c.Assert(math.Abs(ratio-output[i].Selectivity) < eps, IsTrue,
+			Commentf("for %s, needed: %v, got: %v", tt, output[i].Selectivity, ratio))
+	}
+
+	// Test issue 19981
+	testKit.MustExec("select * from t where _tidb_rowid is null or _tidb_rowid > 7")
+
+	// Test issue 22134
+	// Information about column n will not be in stats immediately after this SQL executed.
+	// If we don't have a check against this, DNF condition could lead to infinite recursion in Selectivity().
+	testKit.MustExec("alter table t add column n timestamp;")
+	testKit.MustExec("select * from t where n = '2000-01-01' or n = '2000-01-02';")
+
+	// Test issue 27294
+	testKit.MustExec("create table tt (COL1 blob DEFAULT NULL,COL2 decimal(37,4) DEFAULT NULL,COL3 timestamp NULL DEFAULT NULL,COL4 int(11) DEFAULT NULL,UNIQUE KEY U_M_COL4(COL1(10),COL2), UNIQUE KEY U_M_COL5(COL3,COL2));")
+	testKit.MustExec("explain select * from tt where col1 is not null or col2 not between 454623814170074.2771 and -975540642273402.9269 and col3 not between '2039-1-19 10:14:57' and '2002-3-27 14:40:23';")
+}
+
+func (s *testStatsSuite) TestIndexEstimationCrossValidate(c *C) {
+	defer cleanEnv(c, s.store, s.do)
+	tk := testkit.NewTestKit(c, s.store)
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t")
+	tk.MustExec("create table t(a int, b int, key(a,b))")
+	tk.MustExec("insert into t values(1, 1), (1, 2), (1, 3), (2, 2)")
+	tk.MustExec("analyze table t")
+	c.Assert(failpoint.Enable("github.com/pingcap/tidb/statistics/table/mockQueryBytesMaxUint64", `return(100000)`), IsNil)
+	tk.MustQuery("explain select * from t where a = 1 and b = 2").Check(testkit.Rows(
+		"IndexReader_6 1.00 root  index:IndexRangeScan_5",
+		"└─IndexRangeScan_5 1.00 cop[tikv] table:t, index:a(a, b) range:[1 2,1 2], keep order:false"))
+	c.Assert(failpoint.Disable("github.com/pingcap/tidb/statistics/table/mockQueryBytesMaxUint64"), IsNil)
+
+	// Test issue 22466
+	tk.MustExec("drop table if exists t2")
+	tk.MustExec("create table t2(a int, b int, key b(b))")
+	tk.MustExec("insert into t2 values(1, 1), (2, 2), (3, 3), (4, 4), (5,5)")
+	// This line of select will mark column b stats as needed, and an invalid(empty) stats for column b
+	// will be loaded at the next analyze line, this will trigger the bug.
+	tk.MustQuery("select * from t2 where b=2")
+	tk.MustExec("analyze table t2 index b")
+	tk.MustQuery("explain select * from t2 where b=2").Check(testkit.Rows(
+		"TableReader_7 1.00 root  data:Selection_6",
+		"└─Selection_6 1.00 cop[tikv]  eq(test.t2.b, 2)",
+		"  └─TableFullScan_5 5.00 cop[tikv] table:t2 keep order:false"))
+}
+
+func (s *testStatsSuite) TestRangeStepOverflow(c *C) {
+	defer cleanEnv(c, s.store, s.do)
+	tk := testkit.NewTestKit(c, s.store)
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t")
+	tk.MustExec("create table t (col datetime)")
+	tk.MustExec("insert into t values('3580-05-26 07:16:48'),('4055-03-06 22:27:16'),('4862-01-26 07:16:54')")
+	h := s.do.StatsHandle()
+	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
+	tk.MustExec("analyze table t")
+	// Trigger the loading of column stats.
+	tk.MustQuery("select * from t where col between '8499-1-23 2:14:38' and '9961-7-23 18:35:26'").Check(testkit.Rows())
+	c.Assert(h.LoadNeededHistograms(), IsNil)
+	// Must execute successfully after loading the column stats.
+	tk.MustQuery("select * from t where col between '8499-1-23 2:14:38' and '9961-7-23 18:35:26'").Check(testkit.Rows())
+}
+
+func (s *testStatsSuite) TestSmallRangeEstimation(c *C) {
+	defer cleanEnv(c, s.store, s.do)
+	testKit := testkit.NewTestKit(c, s.store)
+	testKit.MustExec("use test")
+	testKit.MustExec("drop table if exists t")
+	testKit.MustExec("create table t(a int)")
+	for i := 0; i < 400; i++ {
+		testKit.MustExec(fmt.Sprintf("insert into t values (%v), (%v), (%v)", i, i, i)) // [0, 400)
+	}
+	testKit.MustExec("analyze table t with 0 topn")
+
+	h := s.do.StatsHandle()
+	table, err := s.do.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
+	c.Assert(err, IsNil)
+	statsTbl := h.GetTableStats(table.Meta())
+	sc := &stmtctx.StatementContext{}
+	col := statsTbl.Columns[table.Meta().Columns[0].ID]
+
+	var input []struct {
+		Start int64
+		End   int64
+	}
+	var output []struct {
+		Start int64
+		End   int64
+		Count float64
+	}
+	s.testData.GetTestCases(c, &input, &output)
+	for i, ran := range input {
+		count, err := col.GetColumnRowCount(sc, getRange(ran.Start, ran.End), statsTbl.Count, false)
+		c.Assert(err, IsNil)
+		s.testData.OnRecord(func() {
+			output[i].Start = ran.Start
+			output[i].End = ran.End
+			output[i].Count = count
+		})
+		c.Assert(math.Abs(count-output[i].Count) < eps, IsTrue, Commentf("for [%v, %v], needed: around %v, got: %v", ran.Start, ran.End, output[i].Count, count))
+	}
+}
+>>>>>>> a0de91fa0... statistics: fix a error check to prevent nil dereference (#27295)

From 0b1fc46df92a60b0e5c8570332fc8ae66596fc71 Mon Sep 17 00:00:00 2001
From: time-and-fate <25057648+time-and-fate@users.noreply.github.com>
Date: Tue, 17 Aug 2021 17:52:20 +0800
Subject: [PATCH 2/2] resolve conflicts

---
 statistics/selectivity_test.go | 151 +--------------------------------
 1 file changed, 2 insertions(+), 149 deletions(-)

diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go
index 75de7224426f0..7218711b4881c 100644
--- a/statistics/selectivity_test.go
+++ b/statistics/selectivity_test.go
@@ -667,160 +667,13 @@ func (s *testStatsSuite) TestCollationColumnEstimate(c *C) {
 		tk.MustQuery(input[i]).Check(testkit.Rows(output[i]...))
 	}
 }
-<<<<<<< HEAD
-=======
 
-// TestDNFCondSelectivity tests selectivity calculation with DNF conditions covered by using independence assumption.
-func (s *testStatsSuite) TestDNFCondSelectivity(c *C) {
+func (s *testStatsSuite) TestIssue27294(c *C) {
 	defer cleanEnv(c, s.store, s.do)
 	testKit := testkit.NewTestKit(c, s.store)
 
 	testKit.MustExec("use test")
-	testKit.MustExec("drop table if exists t")
-	testKit.MustExec("create table t(a int, b int, c int, d int)")
-	testKit.MustExec("insert into t value(1,5,4,4),(3,4,1,8),(4,2,6,10),(6,7,2,5),(7,1,4,9),(8,9,8,3),(9,1,9,1),(10,6,6,2)")
-	testKit.MustExec("alter table t add index (b)")
-	testKit.MustExec("alter table t add index (d)")
-	testKit.MustExec(`analyze table t`)
-
-	ctx := context.Background()
-	h := s.do.StatsHandle()
-	tb, err := s.do.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
-	c.Assert(err, IsNil)
-	tblInfo := tb.Meta()
-	statsTbl := h.GetTableStats(tblInfo)
-
-	var (
-		input  []string
-		output []struct {
-			SQL         string
-			Selectivity float64
-		}
-	)
-	s.testData.GetTestCases(c, &input, &output)
-	for i, tt := range input {
-		sctx := testKit.Se.(sessionctx.Context)
-		stmts, err := session.Parse(sctx, tt)
-		c.Assert(err, IsNil, Commentf("error %v, for sql %s", err, tt))
-		c.Assert(stmts, HasLen, 1)
-
-		ret := &plannercore.PreprocessorReturn{}
-		err = plannercore.Preprocess(sctx, stmts[0], plannercore.WithPreprocessorReturn(ret))
-		c.Assert(err, IsNil, Commentf("error %v, for sql %s", err, tt))
-		p, _, err := plannercore.BuildLogicalPlan(ctx, sctx, stmts[0], ret.InfoSchema)
-		c.Assert(err, IsNil, Commentf("error %v, for building plan, sql %s", err, tt))
-
-		sel := p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection)
-		ds := sel.Children()[0].(*plannercore.DataSource)
-
-		histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.Columns, ds.Schema().Columns)
-
-		ratio, _, err := histColl.Selectivity(sctx, sel.Conditions, nil)
-		c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt))
-		s.testData.OnRecord(func() {
-			output[i].SQL = tt
-			output[i].Selectivity = ratio
-		})
-		c.Assert(math.Abs(ratio-output[i].Selectivity) < eps, IsTrue,
-			Commentf("for %s, needed: %v, got: %v", tt, output[i].Selectivity, ratio))
-	}
-
-	// Test issue 19981
-	testKit.MustExec("select * from t where _tidb_rowid is null or _tidb_rowid > 7")
-
-	// Test issue 22134
-	// Information about column n will not be in stats immediately after this SQL executed.
-	// If we don't have a check against this, DNF condition could lead to infinite recursion in Selectivity().
-	testKit.MustExec("alter table t add column n timestamp;")
-	testKit.MustExec("select * from t where n = '2000-01-01' or n = '2000-01-02';")
-
-	// Test issue 27294
+	testKit.MustExec("drop table if exists tt")
 	testKit.MustExec("create table tt (COL1 blob DEFAULT NULL,COL2 decimal(37,4) DEFAULT NULL,COL3 timestamp NULL DEFAULT NULL,COL4 int(11) DEFAULT NULL,UNIQUE KEY U_M_COL4(COL1(10),COL2), UNIQUE KEY U_M_COL5(COL3,COL2));")
 	testKit.MustExec("explain select * from tt where col1 is not null or col2 not between 454623814170074.2771 and -975540642273402.9269 and col3 not between '2039-1-19 10:14:57' and '2002-3-27 14:40:23';")
 }
-
-func (s *testStatsSuite) TestIndexEstimationCrossValidate(c *C) {
-	defer cleanEnv(c, s.store, s.do)
-	tk := testkit.NewTestKit(c, s.store)
-	tk.MustExec("use test")
-	tk.MustExec("drop table if exists t")
-	tk.MustExec("create table t(a int, b int, key(a,b))")
-	tk.MustExec("insert into t values(1, 1), (1, 2), (1, 3), (2, 2)")
-	tk.MustExec("analyze table t")
-	c.Assert(failpoint.Enable("github.com/pingcap/tidb/statistics/table/mockQueryBytesMaxUint64", `return(100000)`), IsNil)
-	tk.MustQuery("explain select * from t where a = 1 and b = 2").Check(testkit.Rows(
-		"IndexReader_6 1.00 root  index:IndexRangeScan_5",
-		"└─IndexRangeScan_5 1.00 cop[tikv] table:t, index:a(a, b) range:[1 2,1 2], keep order:false"))
-	c.Assert(failpoint.Disable("github.com/pingcap/tidb/statistics/table/mockQueryBytesMaxUint64"), IsNil)
-
-	// Test issue 22466
-	tk.MustExec("drop table if exists t2")
-	tk.MustExec("create table t2(a int, b int, key b(b))")
-	tk.MustExec("insert into t2 values(1, 1), (2, 2), (3, 3), (4, 4), (5,5)")
-	// This line of select will mark column b stats as needed, and an invalid(empty) stats for column b
-	// will be loaded at the next analyze line, this will trigger the bug.
-	tk.MustQuery("select * from t2 where b=2")
-	tk.MustExec("analyze table t2 index b")
-	tk.MustQuery("explain select * from t2 where b=2").Check(testkit.Rows(
-		"TableReader_7 1.00 root  data:Selection_6",
-		"└─Selection_6 1.00 cop[tikv]  eq(test.t2.b, 2)",
-		"  └─TableFullScan_5 5.00 cop[tikv] table:t2 keep order:false"))
-}
-
-func (s *testStatsSuite) TestRangeStepOverflow(c *C) {
-	defer cleanEnv(c, s.store, s.do)
-	tk := testkit.NewTestKit(c, s.store)
-	tk.MustExec("use test")
-	tk.MustExec("drop table if exists t")
-	tk.MustExec("create table t (col datetime)")
-	tk.MustExec("insert into t values('3580-05-26 07:16:48'),('4055-03-06 22:27:16'),('4862-01-26 07:16:54')")
-	h := s.do.StatsHandle()
-	c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil)
-	tk.MustExec("analyze table t")
-	// Trigger the loading of column stats.
-	tk.MustQuery("select * from t where col between '8499-1-23 2:14:38' and '9961-7-23 18:35:26'").Check(testkit.Rows())
-	c.Assert(h.LoadNeededHistograms(), IsNil)
-	// Must execute successfully after loading the column stats.
-	tk.MustQuery("select * from t where col between '8499-1-23 2:14:38' and '9961-7-23 18:35:26'").Check(testkit.Rows())
-}
-
-func (s *testStatsSuite) TestSmallRangeEstimation(c *C) {
-	defer cleanEnv(c, s.store, s.do)
-	testKit := testkit.NewTestKit(c, s.store)
-	testKit.MustExec("use test")
-	testKit.MustExec("drop table if exists t")
-	testKit.MustExec("create table t(a int)")
-	for i := 0; i < 400; i++ {
-		testKit.MustExec(fmt.Sprintf("insert into t values (%v), (%v), (%v)", i, i, i)) // [0, 400)
-	}
-	testKit.MustExec("analyze table t with 0 topn")
-
-	h := s.do.StatsHandle()
-	table, err := s.do.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
-	c.Assert(err, IsNil)
-	statsTbl := h.GetTableStats(table.Meta())
-	sc := &stmtctx.StatementContext{}
-	col := statsTbl.Columns[table.Meta().Columns[0].ID]
-
-	var input []struct {
-		Start int64
-		End   int64
-	}
-	var output []struct {
-		Start int64
-		End   int64
-		Count float64
-	}
-	s.testData.GetTestCases(c, &input, &output)
-	for i, ran := range input {
-		count, err := col.GetColumnRowCount(sc, getRange(ran.Start, ran.End), statsTbl.Count, false)
-		c.Assert(err, IsNil)
-		s.testData.OnRecord(func() {
-			output[i].Start = ran.Start
-			output[i].End = ran.End
-			output[i].Count = count
-		})
-		c.Assert(math.Abs(count-output[i].Count) < eps, IsTrue, Commentf("for [%v, %v], needed: around %v, got: %v", ran.Start, ran.End, output[i].Count, count))
-	}
-}
->>>>>>> a0de91fa0... statistics: fix a error check to prevent nil dereference (#27295)