From 425d39e0abb6d7cf0bd353746e7d5469de0834d0 Mon Sep 17 00:00:00 2001
From: Yiding Cui <winoros@gmail.com>
Date: Thu, 20 Aug 2020 12:14:10 +0800
Subject: [PATCH 1/9] add bucket ndv for index histogram

---
 .../r/explain_complex_stats.result            |  16 +--
 cmd/explaintest/r/explain_easy_stats.result   |  22 +--
 cmd/explaintest/r/explain_join_stats.result   |  16 +--
 distsql/select_result.go                      |   3 +-
 distsql/stream.go                             |   2 +-
 executor/show_stats.go                        |   1 +
 go.mod                                        |   2 +-
 go.sum                                        |   4 +
 planner/core/cbo_test.go                      |   8 +-
 planner/core/planbuilder.go                   |   4 +-
 planner/core/testdata/analyze_suite_out.json  |  16 ++-
 .../integration_serial_suite_out.json         |   2 +-
 .../core/testdata/integration_suite_out.json  |   4 +-
 session/bootstrap.go                          |  11 ++
 session/session.go                            |   2 +-
 statistics/builder.go                         |   4 +-
 statistics/feedback.go                        |  74 +++++++---
 statistics/feedback_test.go                   | 127 +++++++++++-------
 statistics/handle/handle.go                   |   6 +-
 statistics/handle/update.go                   |  18 ++-
 statistics/handle/update_test.go              | 100 +++++++-------
 statistics/histogram.go                       |  59 +++++---
 statistics/statistics_test.go                 |   2 +-
 statistics/table.go                           |   3 +
 statistics/testdata/stats_suite_out.json      |   8 +-
 .../unistore/cophandler/closure_exec.go       |  31 +++++
 .../unistore/cophandler/cop_handler.go        |   7 +-
 27 files changed, 350 insertions(+), 202 deletions(-)

diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result
index 6aac4462e7763..15274e5cafa51 100644
--- a/cmd/explaintest/r/explain_complex_stats.result
+++ b/cmd/explaintest/r/explain_complex_stats.result
@@ -115,14 +115,14 @@ PRIMARY KEY (aid,dic)
 load stats 's/explain_complex_stats_rr.json';
 explain SELECT ds, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(dic) as install_device FROM dt use index (cm) WHERE (ds >= '2016-09-01') AND (ds <= '2016-11-03') AND (cm IN ('1062', '1086', '1423', '1424', '1425', '1426', '1427', '1428', '1429', '1430', '1431', '1432', '1433', '1434', '1435', '1436', '1437', '1438', '1439', '1440', '1441', '1442', '1443', '1444', '1445', '1446', '1447', '1448', '1449', '1450', '1451', '1452', '1488', '1489', '1490', '1491', '1492', '1493', '1494', '1495', '1496', '1497', '1550', '1551', '1552', '1553', '1554', '1555', '1556', '1557', '1558', '1559', '1597', '1598', '1599', '1600', '1601', '1602', '1603', '1604', '1605', '1606', '1607', '1608', '1609', '1610', '1611', '1612', '1613', '1614', '1615', '1616', '1623', '1624', '1625', '1626', '1627', '1628', '1629', '1630', '1631', '1632', '1709', '1719', '1720', '1843', '2813', '2814', '2815', '2816', '2817', '2818', '2819', '2820', '2821', '2822', '2823', '2824', '2825', '2826', '2827', '2828', '2829', '2830', '2831', '2832', '2833', '2834', '2835', '2836', '2837', '2838', '2839', '2840', '2841', '2842', '2843', '2844', '2845', '2846', '2847', '2848', '2849', '2850', '2851', '2852', '2853', '2854', '2855', '2856', '2857', '2858', '2859', '2860', '2861', '2862', '2863', '2864', '2865', '2866', '2867', '2868', '2869', '2870', '2871', '2872', '3139', '3140', '3141', '3142', '3143', '3144', '3145', '3146', '3147', '3148', '3149', '3150', '3151', '3152', '3153', '3154', '3155', '3156', '3157', '3158', '3386', '3387', '3388', '3389', '3390', '3391', '3392', '3393', '3394', '3395', '3664', '3665', '3666', '3667', '3668', '3670', '3671', '3672', '3673', '3674', '3676', '3677', '3678', '3679', '3680', '3681', '3682', '3683', '3684', '3685', '3686', '3687', '3688', '3689', '3690', '3691', '3692', '3693', '3694', '3695', '3696', '3697', '3698', '3699', '3700', '3701', '3702', '3703', '3704', '3705', '3706', '3707', '3708', '3709', '3710', '3711', '3712', '3713', '3714', '3715', '3960', '3961', '3962', '3963', '3964', '3965', '3966', '3967', '3968', '3978', '3979', '3980', '3981', '3982', '3983', '3984', '3985', '3986', '3987', '4208', '4209', '4210', '4211', '4212', '4304', '4305', '4306', '4307', '4308', '4866', '4867', '4868', '4869', '4870', '4871', '4872', '4873', '4874', '4875')) GROUP BY ds, p1, p2, p3, p4, p5, p6_md5, p7_md5 ORDER BY ds2 DESC;
 id	estRows	task	access object	operator info
-Projection_7	21.53	root		test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, Column#21
-└─Sort_8	21.53	root		test.dt.ds2:desc
-  └─HashAgg_16	21.53	root		group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(Column#32)->Column#21, funcs:firstrow(test.dt.ds)->test.dt.ds, funcs:firstrow(Column#34)->test.dt.ds2, funcs:firstrow(test.dt.p1)->test.dt.p1, funcs:firstrow(test.dt.p2)->test.dt.p2, funcs:firstrow(test.dt.p3)->test.dt.p3, funcs:firstrow(test.dt.p4)->test.dt.p4, funcs:firstrow(test.dt.p5)->test.dt.p5, funcs:firstrow(test.dt.p6_md5)->test.dt.p6_md5, funcs:firstrow(test.dt.p7_md5)->test.dt.p7_md5
-    └─IndexLookUp_17	21.53	root		
-      ├─IndexRangeScan_13(Build)	128.32	cop[tikv]	table:dt, index:cm(cm)	range:[1062,1062], [1086,1086], [1423,1423], [1424,1424], [1425,1425], [1426,1426], [1427,1427], [1428,1428], [1429,1429], [1430,1430], [1431,1431], [1432,1432], [1433,1433], [1434,1434], [1435,1435], [1436,1436], [1437,1437], [1438,1438], [1439,1439], [1440,1440], [1441,1441], [1442,1442], [1443,1443], [1444,1444], [1445,1445], [1446,1446], [1447,1447], [1448,1448], [1449,1449], [1450,1450], [1451,1451], [1452,1452], [1488,1488], [1489,1489], [1490,1490], [1491,1491], [1492,1492], [1493,1493], [1494,1494], [1495,1495], [1496,1496], [1497,1497], [1550,1550], [1551,1551], [1552,1552], [1553,1553], [1554,1554], [1555,1555], [1556,1556], [1557,1557], [1558,1558], [1559,1559], [1597,1597], [1598,1598], [1599,1599], [1600,1600], [1601,1601], [1602,1602], [1603,1603], [1604,1604], [1605,1605], [1606,1606], [1607,1607], [1608,1608], [1609,1609], [1610,1610], [1611,1611], [1612,1612], [1613,1613], [1614,1614], [1615,1615], [1616,1616], [1623,1623], [1624,1624], [1625,1625], [1626,1626], [1627,1627], [1628,1628], [1629,1629], [1630,1630], [1631,1631], [1632,1632], [1709,1709], [1719,1719], [1720,1720], [1843,1843], [2813,2813], [2814,2814], [2815,2815], [2816,2816], [2817,2817], [2818,2818], [2819,2819], [2820,2820], [2821,2821], [2822,2822], [2823,2823], [2824,2824], [2825,2825], [2826,2826], [2827,2827], [2828,2828], [2829,2829], [2830,2830], [2831,2831], [2832,2832], [2833,2833], [2834,2834], [2835,2835], [2836,2836], [2837,2837], [2838,2838], [2839,2839], [2840,2840], [2841,2841], [2842,2842], [2843,2843], [2844,2844], [2845,2845], [2846,2846], [2847,2847], [2848,2848], [2849,2849], [2850,2850], [2851,2851], [2852,2852], [2853,2853], [2854,2854], [2855,2855], [2856,2856], [2857,2857], [2858,2858], [2859,2859], [2860,2860], [2861,2861], [2862,2862], [2863,2863], [2864,2864], [2865,2865], [2866,2866], [2867,2867], [2868,2868], [2869,2869], [2870,2870], [2871,2871], [2872,2872], [3139,3139], [3140,3140], [3141,3141], [3142,3142], [3143,3143], [3144,3144], [3145,3145], [3146,3146], [3147,3147], [3148,3148], [3149,3149], [3150,3150], [3151,3151], [3152,3152], [3153,3153], [3154,3154], [3155,3155], [3156,3156], [3157,3157], [3158,3158], [3386,3386], [3387,3387], [3388,3388], [3389,3389], [3390,3390], [3391,3391], [3392,3392], [3393,3393], [3394,3394], [3395,3395], [3664,3664], [3665,3665], [3666,3666], [3667,3667], [3668,3668], [3670,3670], [3671,3671], [3672,3672], [3673,3673], [3674,3674], [3676,3676], [3677,3677], [3678,3678], [3679,3679], [3680,3680], [3681,3681], [3682,3682], [3683,3683], [3684,3684], [3685,3685], [3686,3686], [3687,3687], [3688,3688], [3689,3689], [3690,3690], [3691,3691], [3692,3692], [3693,3693], [3694,3694], [3695,3695], [3696,3696], [3697,3697], [3698,3698], [3699,3699], [3700,3700], [3701,3701], [3702,3702], [3703,3703], [3704,3704], [3705,3705], [3706,3706], [3707,3707], [3708,3708], [3709,3709], [3710,3710], [3711,3711], [3712,3712], [3713,3713], [3714,3714], [3715,3715], [3960,3960], [3961,3961], [3962,3962], [3963,3963], [3964,3964], [3965,3965], [3966,3966], [3967,3967], [3968,3968], [3978,3978], [3979,3979], [3980,3980], [3981,3981], [3982,3982], [3983,3983], [3984,3984], [3985,3985], [3986,3986], [3987,3987], [4208,4208], [4209,4209], [4210,4210], [4211,4211], [4212,4212], [4304,4304], [4305,4305], [4306,4306], [4307,4307], [4308,4308], [4866,4866], [4867,4867], [4868,4868], [4869,4869], [4870,4870], [4871,4871], [4872,4872], [4873,4873], [4874,4874], [4875,4875], keep order:false
-      └─HashAgg_11(Probe)	21.53	cop[tikv]		group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(test.dt.dic)->Column#32, funcs:firstrow(test.dt.ds2)->Column#34
-        └─Selection_15	21.56	cop[tikv]		ge(test.dt.ds, 2016-09-01 00:00:00.000000), le(test.dt.ds, 2016-11-03 00:00:00.000000)
-          └─TableRowIDScan_14	128.32	cop[tikv]	table:dt	keep order:false
+Projection_7	308.93	root		test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, Column#21
+└─Sort_8	308.93	root		test.dt.ds2:desc
+  └─HashAgg_16	308.93	root		group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(Column#32)->Column#21, funcs:firstrow(test.dt.ds)->test.dt.ds, funcs:firstrow(Column#34)->test.dt.ds2, funcs:firstrow(test.dt.p1)->test.dt.p1, funcs:firstrow(test.dt.p2)->test.dt.p2, funcs:firstrow(test.dt.p3)->test.dt.p3, funcs:firstrow(test.dt.p4)->test.dt.p4, funcs:firstrow(test.dt.p5)->test.dt.p5, funcs:firstrow(test.dt.p6_md5)->test.dt.p6_md5, funcs:firstrow(test.dt.p7_md5)->test.dt.p7_md5
+    └─IndexLookUp_17	308.93	root		
+      ├─IndexRangeScan_13(Build)	1841.60	cop[tikv]	table:dt, index:cm(cm)	range:[1062,1062], [1086,1086], [1423,1423], [1424,1424], [1425,1425], [1426,1426], [1427,1427], [1428,1428], [1429,1429], [1430,1430], [1431,1431], [1432,1432], [1433,1433], [1434,1434], [1435,1435], [1436,1436], [1437,1437], [1438,1438], [1439,1439], [1440,1440], [1441,1441], [1442,1442], [1443,1443], [1444,1444], [1445,1445], [1446,1446], [1447,1447], [1448,1448], [1449,1449], [1450,1450], [1451,1451], [1452,1452], [1488,1488], [1489,1489], [1490,1490], [1491,1491], [1492,1492], [1493,1493], [1494,1494], [1495,1495], [1496,1496], [1497,1497], [1550,1550], [1551,1551], [1552,1552], [1553,1553], [1554,1554], [1555,1555], [1556,1556], [1557,1557], [1558,1558], [1559,1559], [1597,1597], [1598,1598], [1599,1599], [1600,1600], [1601,1601], [1602,1602], [1603,1603], [1604,1604], [1605,1605], [1606,1606], [1607,1607], [1608,1608], [1609,1609], [1610,1610], [1611,1611], [1612,1612], [1613,1613], [1614,1614], [1615,1615], [1616,1616], [1623,1623], [1624,1624], [1625,1625], [1626,1626], [1627,1627], [1628,1628], [1629,1629], [1630,1630], [1631,1631], [1632,1632], [1709,1709], [1719,1719], [1720,1720], [1843,1843], [2813,2813], [2814,2814], [2815,2815], [2816,2816], [2817,2817], [2818,2818], [2819,2819], [2820,2820], [2821,2821], [2822,2822], [2823,2823], [2824,2824], [2825,2825], [2826,2826], [2827,2827], [2828,2828], [2829,2829], [2830,2830], [2831,2831], [2832,2832], [2833,2833], [2834,2834], [2835,2835], [2836,2836], [2837,2837], [2838,2838], [2839,2839], [2840,2840], [2841,2841], [2842,2842], [2843,2843], [2844,2844], [2845,2845], [2846,2846], [2847,2847], [2848,2848], [2849,2849], [2850,2850], [2851,2851], [2852,2852], [2853,2853], [2854,2854], [2855,2855], [2856,2856], [2857,2857], [2858,2858], [2859,2859], [2860,2860], [2861,2861], [2862,2862], [2863,2863], [2864,2864], [2865,2865], [2866,2866], [2867,2867], [2868,2868], [2869,2869], [2870,2870], [2871,2871], [2872,2872], [3139,3139], [3140,3140], [3141,3141], [3142,3142], [3143,3143], [3144,3144], [3145,3145], [3146,3146], [3147,3147], [3148,3148], [3149,3149], [3150,3150], [3151,3151], [3152,3152], [3153,3153], [3154,3154], [3155,3155], [3156,3156], [3157,3157], [3158,3158], [3386,3386], [3387,3387], [3388,3388], [3389,3389], [3390,3390], [3391,3391], [3392,3392], [3393,3393], [3394,3394], [3395,3395], [3664,3664], [3665,3665], [3666,3666], [3667,3667], [3668,3668], [3670,3670], [3671,3671], [3672,3672], [3673,3673], [3674,3674], [3676,3676], [3677,3677], [3678,3678], [3679,3679], [3680,3680], [3681,3681], [3682,3682], [3683,3683], [3684,3684], [3685,3685], [3686,3686], [3687,3687], [3688,3688], [3689,3689], [3690,3690], [3691,3691], [3692,3692], [3693,3693], [3694,3694], [3695,3695], [3696,3696], [3697,3697], [3698,3698], [3699,3699], [3700,3700], [3701,3701], [3702,3702], [3703,3703], [3704,3704], [3705,3705], [3706,3706], [3707,3707], [3708,3708], [3709,3709], [3710,3710], [3711,3711], [3712,3712], [3713,3713], [3714,3714], [3715,3715], [3960,3960], [3961,3961], [3962,3962], [3963,3963], [3964,3964], [3965,3965], [3966,3966], [3967,3967], [3968,3968], [3978,3978], [3979,3979], [3980,3980], [3981,3981], [3982,3982], [3983,3983], [3984,3984], [3985,3985], [3986,3986], [3987,3987], [4208,4208], [4209,4209], [4210,4210], [4211,4211], [4212,4212], [4304,4304], [4305,4305], [4306,4306], [4307,4307], [4308,4308], [4866,4866], [4867,4867], [4868,4868], [4869,4869], [4870,4870], [4871,4871], [4872,4872], [4873,4873], [4874,4874], [4875,4875], keep order:false
+      └─HashAgg_11(Probe)	308.93	cop[tikv]		group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(test.dt.dic)->Column#32, funcs:firstrow(test.dt.ds2)->Column#34
+        └─Selection_15	309.39	cop[tikv]		ge(test.dt.ds, 2016-09-01 00:00:00.000000), le(test.dt.ds, 2016-11-03 00:00:00.000000)
+          └─TableRowIDScan_14	1841.60	cop[tikv]	table:dt	keep order:false
 explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext, gad.t as gtime from st gad join (select id, aid, pt, dic, ip, t from dd where pt = 'android' and bm = 0 and t > 1478143908) sdk on  gad.aid = sdk.aid and gad.ip = sdk.ip and sdk.t > gad.t where gad.t > 1478143908 and gad.bm = 0 and gad.pt = 'android' group by gad.aid, sdk.dic limit 2500;
 id	estRows	task	access object	operator info
 Projection_13	424.00	root		test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t
diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result
index fd40354ca817e..35ee16645a364 100644
--- a/cmd/explaintest/r/explain_easy_stats.result
+++ b/cmd/explaintest/r/explain_easy_stats.result
@@ -42,16 +42,16 @@ TableReader_6	1999.00	root		data:TableRangeScan_5
 └─TableRangeScan_5	1999.00	cop[tikv]	table:t1	range:(0,+inf], keep order:false
 explain select t1.c1, t1.c2 from t1 where t1.c2 = 1;
 id	estRows	task	access object	operator info
-IndexReader_6	0.00	root		index:IndexRangeScan_5
-└─IndexRangeScan_5	0.00	cop[tikv]	table:t1, index:c2(c2)	range:[1,1], keep order:false
+IndexReader_6	8.00	root		index:IndexRangeScan_5
+└─IndexRangeScan_5	8.00	cop[tikv]	table:t1, index:c2(c2)	range:[1,1], keep order:false
 explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1;
 id	estRows	task	access object	operator info
 HashJoin_22	2481.25	root		left outer join, equal:[eq(test.t1.c2, test.t2.c1)]
 ├─TableReader_36(Build)	1985.00	root		data:Selection_35
 │ └─Selection_35	1985.00	cop[tikv]		not(isnull(test.t2.c1))
 │   └─TableFullScan_34	1985.00	cop[tikv]	table:t2	keep order:false
-└─TableReader_33(Probe)	1998.00	root		data:TableRangeScan_32
-  └─TableRangeScan_32	1998.00	cop[tikv]	table:t1	range:(1,+inf], keep order:false
+└─TableReader_33(Probe)	1991.00	root		data:TableRangeScan_32
+  └─TableRangeScan_32	1991.00	cop[tikv]	table:t1	range:(1,+inf], keep order:false
 explain update t1 set t1.c2 = 2 where t1.c1 = 1;
 id	estRows	task	access object	operator info
 Update_2	N/A	root		N/A
@@ -59,9 +59,9 @@ Update_2	N/A	root		N/A
 explain delete from t1 where t1.c2 = 1;
 id	estRows	task	access object	operator info
 Delete_4	N/A	root		N/A
-└─IndexLookUp_11	0.00	root		
-  ├─IndexRangeScan_9(Build)	0.00	cop[tikv]	table:t1, index:c2(c2)	range:[1,1], keep order:false
-  └─TableRowIDScan_10(Probe)	0.00	cop[tikv]	table:t1	keep order:false
+└─IndexLookUp_11	8.00	root		
+  ├─IndexRangeScan_9(Build)	8.00	cop[tikv]	table:t1, index:c2(c2)	range:[1,1], keep order:false
+  └─TableRowIDScan_10(Probe)	8.00	cop[tikv]	table:t1	keep order:false
 explain select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1;
 id	estRows	task	access object	operator info
 Projection_11	1985.00	root		cast(Column#8, bigint(21) BINARY)->Column#7
@@ -80,10 +80,10 @@ TopN_7	1.00	root		test.t2.c2, offset:0, count:1
     └─TableFullScan_13	1985.00	cop[tikv]	table:t2	keep order:false
 explain select * from t1 where c1 > 1 and c2 = 1 and c3 < 1;
 id	estRows	task	access object	operator info
-IndexLookUp_11	0.00	root		
-├─IndexRangeScan_8(Build)	0.00	cop[tikv]	table:t1, index:c2(c2)	range:(1 1,1 +inf], keep order:false
-└─Selection_10(Probe)	0.00	cop[tikv]		lt(test.t1.c3, 1)
-  └─TableRowIDScan_9	0.00	cop[tikv]	table:t1	keep order:false
+IndexLookUp_11	0.51	root		
+├─IndexRangeScan_8(Build)	1.00	cop[tikv]	table:t1, index:c2(c2)	range:(1 1,1 +inf], keep order:false
+└─Selection_10(Probe)	0.51	cop[tikv]		lt(test.t1.c3, 1)
+  └─TableRowIDScan_9	1.00	cop[tikv]	table:t1	keep order:false
 explain select * from t1 where c1 = 1 and c2 > 1;
 id	estRows	task	access object	operator info
 Selection_6	0.50	root		gt(test.t1.c2, 1)
diff --git a/cmd/explaintest/r/explain_join_stats.result b/cmd/explaintest/r/explain_join_stats.result
index 73bb6e2671ba4..4686025ea6703 100644
--- a/cmd/explaintest/r/explain_join_stats.result
+++ b/cmd/explaintest/r/explain_join_stats.result
@@ -7,21 +7,21 @@ load stats 's/explain_join_stats_lo.json';
 explain select count(*) from e, lo where lo.a=e.a and e.b=22336;
 id	estRows	task	access object	operator info
 StreamAgg_13	1.00	root		funcs:count(1)->Column#5
-└─HashJoin_89	19977.00	root		inner join, equal:[eq(test.lo.a, test.e.a)]
+└─HashJoin_89	20044.00	root		inner join, equal:[eq(test.lo.a, test.e.a)]
   ├─TableReader_50(Build)	250.00	root		data:TableFullScan_49
   │ └─TableFullScan_49	250.00	cop[tikv]	table:lo	keep order:false
-  └─IndexLookUp_61(Probe)	19977.00	root		
-    ├─IndexRangeScan_58(Build)	19977.00	cop[tikv]	table:e, index:idx_b(b)	range:[22336,22336], keep order:false
-    └─Selection_60(Probe)	19977.00	cop[tikv]		not(isnull(test.e.a))
-      └─TableRowIDScan_59	19977.00	cop[tikv]	table:e	keep order:false
+  └─IndexLookUp_61(Probe)	20044.00	root		
+    ├─IndexRangeScan_58(Build)	20044.00	cop[tikv]	table:e, index:idx_b(b)	range:[22336,22336], keep order:false
+    └─Selection_60(Probe)	20044.00	cop[tikv]		not(isnull(test.e.a))
+      └─TableRowIDScan_59	20044.00	cop[tikv]	table:e	keep order:false
 explain select /*+ TIDB_INLJ(e) */ count(*) from e, lo where lo.a=e.a and e.b=22336;
 id	estRows	task	access object	operator info
 StreamAgg_12	1.00	root		funcs:count(1)->Column#5
-└─IndexJoin_56	19977.00	root		inner join, inner:IndexLookUp_55, outer key:test.lo.a, inner key:test.e.a
+└─IndexJoin_56	20044.00	root		inner join, inner:IndexLookUp_55, outer key:test.lo.a, inner key:test.e.a
   ├─TableReader_40(Build)	250.00	root		data:TableFullScan_39
   │ └─TableFullScan_39	250.00	cop[tikv]	table:lo	keep order:false
-  └─IndexLookUp_55(Probe)	79.91	root		
+  └─IndexLookUp_55(Probe)	80.18	root		
     ├─Selection_53(Build)	4080.00	cop[tikv]		not(isnull(test.e.a))
     │ └─IndexRangeScan_51	4080.00	cop[tikv]	table:e, index:idx_a(a)	range: decided by [eq(test.e.a, test.lo.a)], keep order:false
-    └─Selection_54(Probe)	79.91	cop[tikv]		eq(test.e.b, 22336)
+    └─Selection_54(Probe)	80.18	cop[tikv]		eq(test.e.b, 22336)
       └─TableRowIDScan_52	4080.00	cop[tikv]	table:e	keep order:false
diff --git a/distsql/select_result.go b/distsql/select_result.go
index 02d7dc77acf05..f177fd3632106 100644
--- a/distsql/select_result.go
+++ b/distsql/select_result.go
@@ -139,7 +139,8 @@ func (r *selectResult) fetchResp(ctx context.Context) error {
 		for _, warning := range r.selectResp.Warnings {
 			sc.AppendWarning(terror.ClassTiKV.Synthesize(terror.ErrCode(warning.Code), warning.Msg))
 		}
-		r.feedback.Update(resultSubset.GetStartKey(), r.selectResp.OutputCounts)
+		logutil.BgLogger().Warn("select resp", zap.Int64s("output cnt", r.selectResp.OutputCounts), zap.Int64s("ndvs", r.selectResp.Ndvs))
+		r.feedback.Update(resultSubset.GetStartKey(), r.selectResp.OutputCounts, r.selectResp.Ndvs)
 		r.partialCount++
 
 		hasStats, ok := resultSubset.(CopRuntimeStats)
diff --git a/distsql/stream.go b/distsql/stream.go
index f1817084cdf44..c5618d95a03f9 100644
--- a/distsql/stream.go
+++ b/distsql/stream.go
@@ -104,7 +104,7 @@ func (r *streamResult) readDataFromResponse(ctx context.Context, resp kv.Respons
 	if err != nil {
 		return false, errors.Trace(err)
 	}
-	r.feedback.Update(resultSubset.GetStartKey(), stream.OutputCounts)
+	r.feedback.Update(resultSubset.GetStartKey(), stream.OutputCounts, stream.Ndvs)
 	r.partialCount++
 
 	hasStats, ok := resultSubset.(CopRuntimeStats)
diff --git a/executor/show_stats.go b/executor/show_stats.go
index 0c1461821528c..da05837f61dc2 100644
--- a/executor/show_stats.go
+++ b/executor/show_stats.go
@@ -181,6 +181,7 @@ func (e *ShowExec) bucketsToRows(dbName, tblName, partitionName, colName string,
 			hist.Buckets[i].Repeat,
 			lowerBoundStr,
 			upperBoundStr,
+			hist.Buckets[i].NDV,
 		})
 	}
 	return nil
diff --git a/go.mod b/go.mod
index 1e8246c97c058..e63359ec62781 100644
--- a/go.mod
+++ b/go.mod
@@ -40,7 +40,7 @@ require (
 	github.com/pingcap/pd/v4 v4.0.0-rc.2.0.20200730093003-dc8c75cf7ca0
 	github.com/pingcap/sysutil v0.0.0-20200715082929-4c47bcac246a
 	github.com/pingcap/tidb-tools v4.0.1+incompatible
-	github.com/pingcap/tipb v0.0.0-20200618092958-4fad48b4c8c3
+	github.com/pingcap/tipb v0.0.0-20201013162506-6fc729765611
 	github.com/prometheus/client_golang v1.5.1
 	github.com/prometheus/client_model v0.2.0
 	github.com/prometheus/common v0.9.1
diff --git a/go.sum b/go.sum
index b7c779284d851..781d0774daf9f 100644
--- a/go.sum
+++ b/go.sum
@@ -516,6 +516,10 @@ github.com/pingcap/tipb v0.0.0-20200604070248-508f03b0b342/go.mod h1:RtkHW8WbcNx
 github.com/pingcap/tipb v0.0.0-20200615034523-dcfcea0b5965/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
 github.com/pingcap/tipb v0.0.0-20200618092958-4fad48b4c8c3 h1:ESL3eIt1kUt8IMvR1011ejZlAyDcOzw89ARvVHvpD5k=
 github.com/pingcap/tipb v0.0.0-20200618092958-4fad48b4c8c3/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
+github.com/pingcap/tipb v0.0.0-20200819200035-714bd87bf361 h1:HSe6jRjauAbijvoQdJO/xOdXAY5gUYLpLO0G5HyBHzg=
+github.com/pingcap/tipb v0.0.0-20200819200035-714bd87bf361/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
+github.com/pingcap/tipb v0.0.0-20201013162506-6fc729765611 h1:GVmsE4VK2NZK0v2j2xUQoba+UDMh4/da+ScIJBjd1og=
+github.com/pingcap/tipb v0.0.0-20201013162506-6fc729765611/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
 github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go
index 781952adf3e05..2c5ca439d28dc 100644
--- a/planner/core/cbo_test.go
+++ b/planner/core/cbo_test.go
@@ -552,10 +552,10 @@ func (s *testAnalyzeSuite) TestInconsistentEstimation(c *C) {
 	// the `a = 5 and c = 5` will get 10, it is not consistent.
 	tk.MustQuery("explain select * from t use index(ab) where a = 5 and c = 5").
 		Check(testkit.Rows(
-			"IndexLookUp_8 10.00 root  ",
-			"├─IndexRangeScan_5(Build) 12.50 cop[tikv] table:t, index:ab(a, b) range:[5,5], keep order:false",
-			"└─Selection_7(Probe) 10.00 cop[tikv]  eq(test.t.c, 5)",
-			"  └─TableRowIDScan_6 12.50 cop[tikv] table:t keep order:false",
+			"IndexLookUp_8 7.00 root  ",
+			"├─IndexRangeScan_5(Build) 8.75 cop[tikv] table:t, index:ab(a, b) range:[5,5], keep order:false",
+			"└─Selection_7(Probe) 7.00 cop[tikv]  eq(test.t.c, 5)",
+			"  └─TableRowIDScan_6 8.75 cop[tikv] table:t keep order:false",
 		))
 }
 
diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go
index 7e7ab31489c0e..2dbd09dbac545 100644
--- a/planner/core/planbuilder.go
+++ b/planner/core/planbuilder.go
@@ -3316,9 +3316,9 @@ func buildShowSchema(s *ast.ShowStmt, isView bool, isSequence bool) (schema *exp
 			mysql.TypeLonglong, mysql.TypeLonglong, mysql.TypeDouble, mysql.TypeDouble}
 	case ast.ShowStatsBuckets:
 		names = []string{"Db_name", "Table_name", "Partition_name", "Column_name", "Is_index", "Bucket_id", "Count",
-			"Repeats", "Lower_Bound", "Upper_Bound"}
+			"Repeats", "Lower_Bound", "Upper_Bound", "Ndv"}
 		ftypes = []byte{mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeTiny, mysql.TypeLonglong,
-			mysql.TypeLonglong, mysql.TypeLonglong, mysql.TypeVarchar, mysql.TypeVarchar}
+			mysql.TypeLonglong, mysql.TypeLonglong, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeLonglong}
 	case ast.ShowStatsHealthy:
 		names = []string{"Db_name", "Table_name", "Partition_name", "Healthy"}
 		ftypes = []byte{mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeLonglong}
diff --git a/planner/core/testdata/analyze_suite_out.json b/planner/core/testdata/analyze_suite_out.json
index 001454fbce6b5..02700cacf886e 100644
--- a/planner/core/testdata/analyze_suite_out.json
+++ b/planner/core/testdata/analyze_suite_out.json
@@ -306,7 +306,7 @@
       "IndexReader(Index(t.e)[[-inf,10]]->StreamAgg)->StreamAgg",
       "IndexReader(Index(t.e)[[-inf,50]]->StreamAgg)->StreamAgg",
       "IndexReader(Index(t.b_c)[[NULL,+inf]]->Sel([gt(test.t.c, 1)])->HashAgg)->HashAgg",
-      "IndexLookUp(Index(t.e)[[1,1]], Table(t))->HashAgg",
+      "IndexLookUp(Index(t.e)[[1,1]], Table(t)->HashAgg)->HashAgg",
       "TableReader(Table(t)->Sel([gt(test.t.e, 1)])->HashAgg)->HashAgg",
       "IndexLookUp(Index(t.b)[[-inf,20]], Table(t)->HashAgg)->HashAgg",
       "TableReader(Table(t)->Sel([le(test.t.b, 30)])->StreamAgg)->StreamAgg",
@@ -347,18 +347,20 @@
       {
         "SQL": "explain select * from t where a = 7639902",
         "Plan": [
-          "IndexReader_6 6.68 root  index:IndexRangeScan_5",
-          "└─IndexRangeScan_5 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
+          "IndexReader_6 499061.16 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 499061.16 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
         ]
       },
       {
         "SQL": "explain select c, b from t where a = 7639902 order by b asc limit 6",
         "Plan": [
           "Projection_7 6.00 root  test.t.c, test.t.b",
-          "└─TopN_8 6.00 root  test.t.b, offset:0, count:6",
-          "  └─IndexReader_16 6.00 root  index:TopN_15",
-          "    └─TopN_15 6.00 cop[tikv]  test.t.b, offset:0, count:6",
-          "      └─IndexRangeScan_14 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
+          "└─Limit_12 6.00 root  offset:0, count:6",
+          "  └─Projection_23 6.00 root  test.t.a, test.t.b, test.t.c",
+          "    └─IndexLookUp_22 6.00 root  ",
+          "      ├─IndexFullScan_19(Build) 600.00 cop[tikv] table:t, index:b(b) keep order:true",
+          "      └─Selection_21(Probe) 6.00 cop[tikv]  eq(test.t.a, 7639902)",
+          "        └─TableRowIDScan_20 600.00 cop[tikv] table:t keep order:false"
         ]
       }
     ]
diff --git a/planner/core/testdata/integration_serial_suite_out.json b/planner/core/testdata/integration_serial_suite_out.json
index b3be7bdeeab4f..6aee43d9d27e1 100644
--- a/planner/core/testdata/integration_serial_suite_out.json
+++ b/planner/core/testdata/integration_serial_suite_out.json
@@ -29,7 +29,7 @@
           "StreamAgg_32 1.00 root  funcs:count(Column#14)->Column#11",
           "└─TableReader_33 1.00 root  data:StreamAgg_13",
           "  └─StreamAgg_13 1.00 cop[tiflash]  funcs:count(1)->Column#14",
-	  "    └─BroadcastJoin_31 8.00 cop[tiflash]  inner join, left key:test.fact_t.d1_k, right key:test.d1_t.d1_k",
+          "    └─BroadcastJoin_31 8.00 cop[tiflash]  inner join, left key:test.fact_t.d1_k, right key:test.d1_t.d1_k",
           "      ├─Selection_23(Build) 2.00 cop[tiflash]  not(isnull(test.d1_t.d1_k))",
           "      │ └─TableFullScan_22 2.00 cop[tiflash] table:d1_t keep order:false, global read",
           "      └─Selection_21(Probe) 8.00 cop[tiflash]  not(isnull(test.fact_t.d1_k))",
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
index 7480a3dccd376..fedaa72a6b84c 100644
--- a/planner/core/testdata/integration_suite_out.json
+++ b/planner/core/testdata/integration_suite_out.json
@@ -857,8 +857,8 @@
       {
         "SQL": "select * from t1 where t1.a = 1 and t1.b < \"333\"",
         "Plan": [
-          "TableReader_6 0.67 root  data:TableRangeScan_5",
-          "└─TableRangeScan_5 0.67 cop[tikv] table:t1 range:[1 -inf,1 \"333\"), keep order:false"
+          "TableReader_6 1.00 root  data:TableRangeScan_5",
+          "└─TableRangeScan_5 1.00 cop[tikv] table:t1 range:[1 -inf,1 \"333\"), keep order:false"
         ],
         "Res": [
           "1 111 1.1000000000 11"
diff --git a/session/bootstrap.go b/session/bootstrap.go
index 95dfff6586123..af7b42c8ffd51 100644
--- a/session/bootstrap.go
+++ b/session/bootstrap.go
@@ -198,6 +198,7 @@ const (
 		repeats bigint(64) NOT NULL,
 		upper_bound blob NOT NULL,
 		lower_bound blob ,
+      	ndv bigint NOT NULL DEFAULT 0,
 		unique index tbl(table_id, is_index, hist_id, bucket_id)
 	);`
 
@@ -423,6 +424,8 @@ const (
 	version49 = 49
 	// version50 add mysql.schema_index_usage table.
 	version50 = 50
+	// version51 add column ndv for mysql.stats_buckets.
+	version51 = 51
 )
 
 var (
@@ -476,6 +479,7 @@ var (
 		upgradeToVer48,
 		upgradeToVer49,
 		upgradeToVer50,
+		upgradeToVer51,
 	}
 )
 
@@ -1170,6 +1174,13 @@ func upgradeToVer50(s Session, ver int64) {
 	doReentrantDDL(s, CreateSchemaIndexUsageTable)
 }
 
+func upgradeToVer51(s Session, ver int64) {
+	if ver >= version51 {
+		return
+	}
+	doReentrantDDL(s, "ALTER TABLE mysql.stats_buckets ADD COLUMN `ndv` bigint not null default 0", infoschema.ErrColumnExists)
+}
+
 // updateBootstrapVer updates bootstrap version variable in mysql.TiDB table.
 func updateBootstrapVer(s Session) {
 	// Update bootstrap version.
diff --git a/session/session.go b/session/session.go
index 440d9b684066d..01b5b118ae053 100644
--- a/session/session.go
+++ b/session/session.go
@@ -1948,7 +1948,7 @@ func CreateSessionWithDomain(store kv.Storage, dom *domain.Domain) (*session, er
 
 const (
 	notBootstrapped         = 0
-	currentBootstrapVersion = version50
+	currentBootstrapVersion = version51
 )
 
 func getStoreBootstrapVersion(store kv.Storage) int64 {
diff --git a/statistics/builder.go b/statistics/builder.go
index 45ae0a5678c3f..8168c64acfb24 100644
--- a/statistics/builder.go
+++ b/statistics/builder.go
@@ -50,7 +50,7 @@ func (b *SortedBuilder) Hist() *Histogram {
 func (b *SortedBuilder) Iterate(data types.Datum) error {
 	b.Count++
 	if b.Count == 1 {
-		b.hist.AppendBucket(&data, &data, 1, 1)
+		b.hist.AppendBucketWithNDV(&data, &data, 1, 1, 1)
 		b.hist.NDV = 1
 		return nil
 	}
@@ -86,7 +86,7 @@ func (b *SortedBuilder) Iterate(data types.Datum) error {
 		} else {
 			b.lastNumber = b.hist.Buckets[b.bucketIdx].Count
 			b.bucketIdx++
-			b.hist.AppendBucket(&data, &data, b.lastNumber+1, 1)
+			b.hist.AppendBucketWithNDV(&data, &data, b.lastNumber+1, 1, 1)
 		}
 		b.hist.NDV++
 	}
diff --git a/statistics/feedback.go b/statistics/feedback.go
index 545d5875049f0..2ab6724ca6c33 100644
--- a/statistics/feedback.go
+++ b/statistics/feedback.go
@@ -44,6 +44,7 @@ type Feedback struct {
 	Upper  *types.Datum
 	Count  int64
 	Repeat int64
+	Ndv    int64
 }
 
 // QueryFeedback is used to represent the query feedback info. It contains the query's scan ranges and number of rows
@@ -236,7 +237,7 @@ func (q *QueryFeedback) DecodeIntValues() *QueryFeedback {
 func (q *QueryFeedback) StoreRanges(ranges []*ranger.Range) {
 	q.Feedback = make([]Feedback, 0, len(ranges))
 	for _, ran := range ranges {
-		q.Feedback = append(q.Feedback, Feedback{&ran.LowVal[0], &ran.HighVal[0], 0, 0})
+		q.Feedback = append(q.Feedback, Feedback{&ran.LowVal[0], &ran.HighVal[0], 0, 0, 0})
 	}
 }
 
@@ -258,7 +259,7 @@ func (q *QueryFeedback) Actual() int64 {
 
 // Update updates the query feedback. `startKey` is the start scan key of the partial result, used to find
 // the range for update. `counts` is the scan counts of each range, used to update the feedback count info.
-func (q *QueryFeedback) Update(startKey kv.Key, counts []int64) {
+func (q *QueryFeedback) Update(startKey kv.Key, counts, ndvs []int64) {
 	// Older versions do not have the counts info.
 	if len(counts) == 0 {
 		q.Invalidate()
@@ -292,6 +293,7 @@ func (q *QueryFeedback) Update(startKey kv.Key, counts []int64) {
 		for i := 0; i < len(counts)/2; i++ {
 			j := len(counts) - i - 1
 			counts[i], counts[j] = counts[j], counts[i]
+			ndvs[i], ndvs[j] = ndvs[j], ndvs[i]
 		}
 	}
 	// Update the feedback count info.
@@ -301,6 +303,7 @@ func (q *QueryFeedback) Update(startKey kv.Key, counts []int64) {
 			break
 		}
 		q.Feedback[i+idx].Count += count
+		q.Feedback[i+idx].Ndv += ndvs[i]
 	}
 }
 
@@ -503,23 +506,26 @@ type bucket = Feedback
 // calculates the count for each new bucket, merge the new bucket whose count
 // is smaller than "minBucketFraction*totalCount" with the next new bucket
 // until the last new bucket.
-func (b *BucketFeedback) splitBucket(newNumBkts int, totalCount float64, originBucketCount float64) []bucket {
+func (b *BucketFeedback) splitBucket(newNumBkts int, totalCount float64, originBucketCount float64, originalNdv int64) []bucket {
 	// Split the bucket.
 	bounds := b.getBoundaries(newNumBkts + 1)
 	bkts := make([]bucket, 0, len(bounds)-1)
 	sc := &stmtctx.StatementContext{TimeZone: time.UTC}
 	for i := 1; i < len(bounds); i++ {
-		newBkt := bucket{&bounds[i-1], bounds[i].Clone(), 0, 0}
+		newBkt := bucket{&bounds[i-1], bounds[i].Clone(), 0, 0, 0}
 		// get bucket count
-		_, ratio := getOverlapFraction(Feedback{b.lower, b.upper, int64(originBucketCount), 0}, newBkt)
+		_, ratio := getOverlapFraction(Feedback{b.lower, b.upper, int64(originBucketCount), 0, 0}, newBkt)
 		countInNewBkt := originBucketCount * ratio
-		countInNewBkt = b.refineBucketCount(sc, newBkt, countInNewBkt)
+		ndvInNewBkt := int64(float64(originalNdv) * ratio)
+		countInNewBkt, ndvInNewBkt = b.refineBucketCount(sc, newBkt, countInNewBkt, ndvInNewBkt)
+		log.Warn("split bucket", zap.Float64("count", countInNewBkt), zap.Int64("ndv", ndvInNewBkt))
 		// do not split if the count of result bucket is too small.
 		if countInNewBkt < minBucketFraction*totalCount {
 			bounds[i] = bounds[i-1]
 			continue
 		}
 		newBkt.Count = int64(countInNewBkt)
+		newBkt.Ndv = ndvInNewBkt
 		bkts = append(bkts, newBkt)
 		// To guarantee that each bucket's range will not overlap.
 		setNextValue(&bounds[i])
@@ -556,45 +562,51 @@ func getOverlapFraction(fb Feedback, bkt bucket) (float64, float64) {
 }
 
 // mergeFullyContainedFeedback merges the max fraction of non-overlapped feedbacks that are fully contained in the bucket.
-func (b *BucketFeedback) mergeFullyContainedFeedback(sc *stmtctx.StatementContext, bkt bucket) (float64, float64, bool) {
+func (b *BucketFeedback) mergeFullyContainedFeedback(sc *stmtctx.StatementContext, bkt bucket) (float64, float64, int64, bool) {
 	feedbacks := make([]Feedback, 0, len(b.feedback))
 	// Get all the fully contained feedbacks.
 	for _, fb := range b.feedback {
 		res, err := outOfRange(sc, bkt.Lower, bkt.Upper, fb.Lower)
 		if res != 0 || err != nil {
-			return 0, 0, false
+			return 0, 0, 0, false
 		}
 		res, err = outOfRange(sc, bkt.Lower, bkt.Upper, fb.Upper)
 		if res != 0 || err != nil {
-			return 0, 0, false
+			return 0, 0, 0, false
 		}
 		feedbacks = append(feedbacks, fb)
 	}
 	if len(feedbacks) == 0 {
-		return 0, 0, false
+		return 0, 0, 0, false
 	}
 	sortedFBs, ok := NonOverlappedFeedbacks(sc, feedbacks)
 	if !ok {
-		return 0, 0, false
+		return 0, 0, 0, false
 	}
-	var sumFraction, sumCount float64
+	var (
+		sumFraction, sumCount float64
+		ndv                   int64
+	)
 	for _, fb := range sortedFBs {
 		fraction, _ := getOverlapFraction(fb, bkt)
 		sumFraction += fraction
 		sumCount += float64(fb.Count)
+		ndv += fb.Ndv
 	}
-	return sumFraction, sumCount, true
+	return sumFraction, sumCount, ndv, true
 }
 
 // refineBucketCount refine the newly split bucket count. It uses the feedback that overlaps most
 // with the bucket to get the bucket count.
-func (b *BucketFeedback) refineBucketCount(sc *stmtctx.StatementContext, bkt bucket, defaultCount float64) float64 {
+func (b *BucketFeedback) refineBucketCount(sc *stmtctx.StatementContext, bkt bucket, defaultCount float64, defaultNdv int64) (float64, int64) {
 	bestFraction := minBucketFraction
 	count := defaultCount
-	sumFraction, sumCount, ok := b.mergeFullyContainedFeedback(sc, bkt)
+	ndv := defaultNdv
+	sumFraction, sumCount, sumNdv, ok := b.mergeFullyContainedFeedback(sc, bkt)
 	if ok && sumFraction > bestFraction {
 		bestFraction = sumFraction
 		count = sumCount / sumFraction
+		ndv = int64(float64(sumNdv) / sumFraction)
 	}
 	for _, fb := range b.feedback {
 		fraction, ratio := getOverlapFraction(fb, bkt)
@@ -602,9 +614,10 @@ func (b *BucketFeedback) refineBucketCount(sc *stmtctx.StatementContext, bkt buc
 		if fraction > bestFraction {
 			bestFraction = fraction
 			count = float64(fb.Count) * ratio
+			ndv = int64(float64(fb.Ndv) * ratio)
 		}
 	}
-	return count
+	return count, ndv
 }
 
 const (
@@ -685,6 +698,7 @@ func mergeBuckets(bkts []bucket, isNewBuckets []bool, totalCount float64) []buck
 			bkts[bktCursor-1].Upper = bkts[i].Upper
 			bkts[bktCursor-1].Count += bkts[i].Count
 			bkts[bktCursor-1].Repeat = bkts[i].Repeat
+			bkts[bktCursor-1].Ndv += bkts[i].Ndv
 			idCursor++
 		} else {
 			bkts[bktCursor] = bkts[i]
@@ -705,13 +719,13 @@ func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int6
 		bktFB, ok := bktID2FB[i]
 		// No feedback, just use the original one.
 		if !ok {
-			buckets = append(buckets, bucket{h.GetLower(i), h.GetUpper(i), h.bucketCount(i), h.Buckets[i].Repeat})
+			buckets = append(buckets, bucket{h.GetLower(i), h.GetUpper(i), h.bucketCount(i), h.Buckets[i].Repeat, h.Buckets[i].NDV})
 			isNewBuckets = append(isNewBuckets, false)
 			continue
 		}
 		// Distribute the total split count to bucket based on number of bucket feedback.
 		newBktNums := splitCount * len(bktFB.feedback) / numTotalFBs
-		bkts := bktFB.splitBucket(newBktNums, h.TotalRowCount(), float64(h.bucketCount(i)))
+		bkts := bktFB.splitBucket(newBktNums, h.TotalRowCount(), float64(h.bucketCount(i)), h.Buckets[i].NDV)
 		buckets = append(buckets, bkts...)
 		if len(bkts) == 1 {
 			isNewBuckets = append(isNewBuckets, false)
@@ -731,11 +745,22 @@ func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int6
 // UpdateHistogram updates the histogram according buckets.
 func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram {
 	buckets, isNewBuckets, totalCount := splitBuckets(h, feedback)
+	ndvs := make([]int64, len(buckets))
+	for i := range buckets {
+		ndvs[i] = buckets[i].Ndv
+	}
+	log.Warn("update hist", zap.Int64s("ndvs", ndvs))
 	buckets = mergeBuckets(buckets, isNewBuckets, float64(totalCount))
 	hist := buildNewHistogram(h, buckets)
 	// Update the NDV of primary key column.
 	if feedback.Tp == PkType {
 		hist.NDV = int64(hist.TotalRowCount())
+	} else if feedback.Tp == IndexType {
+		totNdv := int64(0)
+		for _, bkt := range buckets {
+			totNdv += bkt.Ndv
+		}
+		hist.NDV = totNdv
 	}
 	return hist
 }
@@ -756,7 +781,7 @@ func buildNewHistogram(h *Histogram, buckets []bucket) *Histogram {
 	hist := NewHistogram(h.ID, h.NDV, h.NullCount, h.LastUpdateVersion, h.Tp, len(buckets), h.TotColSize)
 	preCount := int64(0)
 	for _, bkt := range buckets {
-		hist.AppendBucket(bkt.Lower, bkt.Upper, bkt.Count+preCount, bkt.Repeat)
+		hist.AppendBucketWithNDV(bkt.Lower, bkt.Upper, bkt.Count+preCount, bkt.Repeat, bkt.Ndv)
 		preCount += bkt.Count
 	}
 	return hist
@@ -775,6 +800,8 @@ type queryFeedback struct {
 	// After that, it stores the Ranges for `HashValues`.
 	Counts       []int64
 	ColumnRanges [][]byte
+
+	Ndvs []int64
 }
 
 func encodePKFeedback(q *QueryFeedback) (*queryFeedback, error) {
@@ -794,6 +821,7 @@ func encodePKFeedback(q *QueryFeedback) (*queryFeedback, error) {
 		}
 		pb.IntRanges = append(pb.IntRanges, low, high)
 		pb.Counts = append(pb.Counts, fb.Count)
+		pb.Ndvs = append(pb.Ndvs, fb.Ndv)
 	}
 	return pb, nil
 }
@@ -805,9 +833,11 @@ func encodeIndexFeedback(q *QueryFeedback) *queryFeedback {
 		if bytes.Compare(kv.Key(fb.Lower.GetBytes()).PrefixNext(), fb.Upper.GetBytes()) >= 0 {
 			pb.IndexPoints = append(pb.IndexPoints, fb.Lower.GetBytes())
 			pointCounts = append(pointCounts, fb.Count)
+			pb.Ndvs = append(pb.Ndvs, fb.Ndv)
 		} else {
 			pb.IndexRanges = append(pb.IndexRanges, fb.Lower.GetBytes(), fb.Upper.GetBytes())
 			pb.Counts = append(pb.Counts, fb.Count)
+			pb.Ndvs = append(pb.Ndvs, fb.Ndv)
 		}
 	}
 	pb.Counts = append(pb.Counts, pointCounts...)
@@ -858,7 +888,7 @@ func decodeFeedbackForIndex(q *QueryFeedback, pb *queryFeedback, c *CMSketch) {
 	// decode the index range feedback
 	for i := 0; i < len(pb.IndexRanges); i += 2 {
 		lower, upper := types.NewBytesDatum(pb.IndexRanges[i]), types.NewBytesDatum(pb.IndexRanges[i+1])
-		q.Feedback = append(q.Feedback, Feedback{&lower, &upper, pb.Counts[i/2], 0})
+		q.Feedback = append(q.Feedback, Feedback{&lower, &upper, pb.Counts[i/2], 0, pb.Ndvs[i/2]})
 	}
 	if c != nil {
 		// decode the index point feedback, just set value count in CM Sketch
@@ -891,7 +921,7 @@ func decodeFeedbackForPK(q *QueryFeedback, pb *queryFeedback, isUnsigned bool) {
 			lower.SetInt64(pb.IntRanges[i])
 			upper.SetInt64(pb.IntRanges[i+1])
 		}
-		q.Feedback = append(q.Feedback, Feedback{&lower, &upper, pb.Counts[i/2], 0})
+		q.Feedback = append(q.Feedback, Feedback{&lower, &upper, pb.Counts[i/2], 0, pb.Ndvs[i/2]})
 	}
 }
 
@@ -930,7 +960,7 @@ func decodeFeedbackForColumn(q *QueryFeedback, pb *queryFeedback, ft *types.Fiel
 		if err != nil {
 			return err
 		}
-		q.Feedback = append(q.Feedback, Feedback{&low[0], &high[0], pb.Counts[i/2], 0})
+		q.Feedback = append(q.Feedback, Feedback{&low[0], &high[0], pb.Counts[i/2], 0, 0})
 	}
 	return nil
 }
diff --git a/statistics/feedback_test.go b/statistics/feedback_test.go
index 1d778660171ba..f42f7f57ca730 100644
--- a/statistics/feedback_test.go
+++ b/statistics/feedback_test.go
@@ -17,9 +17,11 @@ import (
 	"bytes"
 
 	. "github.com/pingcap/check"
+	"github.com/pingcap/log"
 	"github.com/pingcap/parser/mysql"
 	"github.com/pingcap/tidb/types"
 	"github.com/pingcap/tidb/util/codec"
+	"go.uber.org/zap"
 )
 
 var _ = Suite(&testFeedbackSuite{})
@@ -27,22 +29,22 @@ var _ = Suite(&testFeedbackSuite{})
 type testFeedbackSuite struct {
 }
 
-func newFeedback(lower, upper, count int64) Feedback {
+func newFeedback(lower, upper, count, ndv int64) Feedback {
 	low, upp := types.NewIntDatum(lower), types.NewIntDatum(upper)
-	return Feedback{&low, &upp, count, 0}
+	return Feedback{&low, &upp, count, 0, ndv}
 }
 
 func genFeedbacks(lower, upper int64) []Feedback {
 	var feedbacks []Feedback
 	for i := lower; i < upper; i++ {
-		feedbacks = append(feedbacks, newFeedback(i, upper, upper-i+1))
+		feedbacks = append(feedbacks, newFeedback(i, upper, upper-i+1, upper-i+1))
 	}
 	return feedbacks
 }
 
 func appendBucket(h *Histogram, l, r int64) {
 	lower, upper := types.NewIntDatum(l), types.NewIntDatum(r)
-	h.AppendBucket(&lower, &upper, 0, 0)
+	h.AppendBucketWithNDV(&lower, &upper, 0, 0, 0)
 }
 
 func genHistogram() *Histogram {
@@ -57,11 +59,11 @@ func genHistogram() *Histogram {
 
 func (s *testFeedbackSuite) TestUpdateHistogram(c *C) {
 	feedbacks := []Feedback{
-		newFeedback(0, 1, 10000),
-		newFeedback(1, 2, 1),
-		newFeedback(2, 3, 3),
-		newFeedback(4, 5, 2),
-		newFeedback(5, 7, 4),
+		newFeedback(0, 1, 10000, 1),
+		newFeedback(1, 2, 1, 1),
+		newFeedback(2, 3, 3, 1),
+		newFeedback(4, 5, 2, 1),
+		newFeedback(5, 7, 4, 1),
 	}
 	feedbacks = append(feedbacks, genFeedbacks(8, 20)...)
 	feedbacks = append(feedbacks, genFeedbacks(21, 60)...)
@@ -73,50 +75,64 @@ func (s *testFeedbackSuite) TestUpdateHistogram(c *C) {
 	defer func() { defaultBucketCount = originBucketCount }()
 	c.Assert(UpdateHistogram(q.Hist, q).ToString(0), Equals,
 		"column:0 ndv:10053 totColSize:0\n"+
-			"num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0\n"+
-			"num: 7 lower_bound: 2 upper_bound: 5 repeats: 0\n"+
-			"num: 4 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
-			"num: 11 lower_bound: 10 upper_bound: 20 repeats: 0\n"+
-			"num: 19 lower_bound: 30 upper_bound: 49 repeats: 0\n"+
-			"num: 11 lower_bound: 50 upper_bound: 60 repeats: 0")
+			"num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0 ndv: 2\n"+
+			"num: 7 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 2\n"+
+			"num: 4 lower_bound: 5 upper_bound: 7 repeats: 0 ndv: 1\n"+
+			"num: 11 lower_bound: 10 upper_bound: 20 repeats: 0 ndv: 11\n"+
+			"num: 19 lower_bound: 30 upper_bound: 49 repeats: 0 ndv: 19\n"+
+			"num: 11 lower_bound: 50 upper_bound: 60 repeats: 0 ndv: 11")
 }
 
 func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
 	// test bucket split
-	feedbacks := []Feedback{newFeedback(0, 1, 1)}
+	feedbacks := []Feedback{newFeedback(0, 1, 1, 1)}
 	for i := 0; i < 100; i++ {
-		feedbacks = append(feedbacks, newFeedback(10, 15, 5))
+		feedbacks = append(feedbacks, newFeedback(10, 15, 5, 5))
 	}
 	q := NewQueryFeedback(0, genHistogram(), 0, false)
 	q.Feedback = feedbacks
+	oldCnts := make([]int64, q.Hist.Len())
+	for i := range q.Hist.Buckets {
+		oldCnts[i] = q.Hist.bucketCount(i)
+	}
+	oldNdvs := make([]int64, q.Hist.Len())
+	for i := range q.Hist.Buckets {
+		oldNdvs[i] = q.Hist.Buckets[i].NDV
+	}
+	log.Warn("in test", zap.Int64s("ndvs", oldNdvs), zap.Int64s("cnts", oldCnts))
 	buckets, isNewBuckets, totalCount := splitBuckets(q.Hist, q)
+	ndvs := make([]int64, len(buckets))
+	for i := range buckets {
+		ndvs[i] = buckets[i].Ndv
+	}
+	log.Warn("in test", zap.Int64s("ndvs", ndvs))
 	c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals,
 		"column:0 ndv:0 totColSize:0\n"+
-			"num: 1 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
-			"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+
-			"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
-			"num: 5 lower_bound: 10 upper_bound: 15 repeats: 0\n"+
-			"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+
-			"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0")
+			"num: 1 lower_bound: 0 upper_bound: 1 repeats: 0 ndv: 1\n"+
+			"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0 ndv: 0\n"+
+			"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0 ndv: 0\n"+
+			"num: 5 lower_bound: 10 upper_bound: 15 repeats: 0 ndv: 5\n"+
+			"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0 ndv: 0\n"+
+			"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0 ndv: 0")
 	c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false})
 	c.Assert(totalCount, Equals, int64(6))
 
 	// test do not split if the bucket count is too small
-	feedbacks = []Feedback{newFeedback(0, 1, 100000)}
+	feedbacks = []Feedback{newFeedback(0, 1, 100000, 1)}
 	for i := 0; i < 100; i++ {
-		feedbacks = append(feedbacks, newFeedback(10, 15, 1))
+		feedbacks = append(feedbacks, newFeedback(10, 15, 1, 1))
 	}
 	q = NewQueryFeedback(0, genHistogram(), 0, false)
 	q.Feedback = feedbacks
 	buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
 	c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals,
 		"column:0 ndv:0 totColSize:0\n"+
-			"num: 100000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
-			"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+
-			"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
-			"num: 1 lower_bound: 10 upper_bound: 15 repeats: 0\n"+
-			"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+
-			"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0")
+			"num: 100000 lower_bound: 0 upper_bound: 1 repeats: 0 ndv: 1\n"+
+			"num: 0 lower_bound: 2 upper_bound: 3 repeats: 0 ndv: 0\n"+
+			"num: 0 lower_bound: 5 upper_bound: 7 repeats: 0 ndv: 0\n"+
+			"num: 1 lower_bound: 10 upper_bound: 15 repeats: 0 ndv: 1\n"+
+			"num: 0 lower_bound: 16 upper_bound: 20 repeats: 0 ndv: 0\n"+
+			"num: 0 lower_bound: 30 upper_bound: 50 repeats: 0 ndv: 0")
 	c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false})
 	c.Assert(totalCount, Equals, int64(100001))
 
@@ -124,16 +140,17 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
 	h := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLong), 5, 0)
 	appendBucket(h, 0, 1000000)
 	h.Buckets[0].Count = 1000000
+	h.Buckets[0].NDV = 1000000
 	feedbacks = feedbacks[:0]
 	for i := 0; i < 100; i++ {
-		feedbacks = append(feedbacks, newFeedback(0, 10, 1))
+		feedbacks = append(feedbacks, newFeedback(0, 10, 1, 1))
 	}
 	q = NewQueryFeedback(0, h, 0, false)
 	q.Feedback = feedbacks
 	buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
 	c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals,
 		"column:0 ndv:0 totColSize:0\n"+
-			"num: 1000000 lower_bound: 0 upper_bound: 1000000 repeats: 0")
+			"num: 1000000 lower_bound: 0 upper_bound: 1000000 repeats: 0 ndv: 1000000")
 	c.Assert(isNewBuckets, DeepEquals, []bool{false})
 	c.Assert(totalCount, Equals, int64(1000000))
 
@@ -142,15 +159,15 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
 	appendBucket(h, 0, 1000000)
 	feedbacks = feedbacks[:0]
 	for i := 0; i < 100; i++ {
-		feedbacks = append(feedbacks, newFeedback(0, 10, 1))
+		feedbacks = append(feedbacks, newFeedback(0, 10, 1, 1))
 	}
 	q = NewQueryFeedback(0, h, 0, false)
 	q.Feedback = feedbacks
 	buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
 	c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals,
 		"column:0 ndv:0 totColSize:0\n"+
-			"num: 1 lower_bound: 0 upper_bound: 10 repeats: 0\n"+
-			"num: 0 lower_bound: 11 upper_bound: 1000000 repeats: 0")
+			"num: 1 lower_bound: 0 upper_bound: 10 repeats: 0 ndv: 1\n"+
+			"num: 0 lower_bound: 11 upper_bound: 1000000 repeats: 0 ndv: 0")
 	c.Assert(isNewBuckets, DeepEquals, []bool{true, true})
 	c.Assert(totalCount, Equals, int64(1))
 
@@ -158,14 +175,14 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) {
 	h = NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLong), 5, 0)
 	appendBucket(h, 0, 10000)
 	feedbacks = feedbacks[:0]
-	feedbacks = append(feedbacks, newFeedback(0, 4000, 4000))
-	feedbacks = append(feedbacks, newFeedback(4001, 9999, 1000))
+	feedbacks = append(feedbacks, newFeedback(0, 4000, 4000, 4000))
+	feedbacks = append(feedbacks, newFeedback(4001, 9999, 1000, 1000))
 	q = NewQueryFeedback(0, h, 0, false)
 	q.Feedback = feedbacks
 	buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q)
 	c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals,
 		"column:0 ndv:0 totColSize:0\n"+
-			"num: 5001 lower_bound: 0 upper_bound: 10000 repeats: 0")
+			"num: 5001 lower_bound: 0 upper_bound: 10000 repeats: 0 ndv: 5001")
 	c.Assert(isNewBuckets, DeepEquals, []bool{false})
 	c.Assert(totalCount, Equals, int64(5001))
 }
@@ -176,6 +193,7 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
 	tests := []struct {
 		points       []int64
 		counts       []int64
+		ndvs         []int64
 		isNewBuckets []bool
 		bucketCount  int
 		result       string
@@ -183,37 +201,43 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) {
 		{
 			points:       []int64{1, 2},
 			counts:       []int64{1},
+			ndvs:         []int64{1},
 			isNewBuckets: []bool{false},
 			bucketCount:  1,
-			result:       "column:0 ndv:0 totColSize:0\nnum: 1 lower_bound: 1 upper_bound: 2 repeats: 0",
+			result:       "column:0 ndv:0 totColSize:0\nnum: 1 lower_bound: 1 upper_bound: 2 repeats: 0 ndv: 1",
 		},
 		{
 			points:       []int64{1, 2, 2, 3, 3, 4},
 			counts:       []int64{100000, 1, 1},
+			ndvs:         []int64{1, 1, 1},
 			isNewBuckets: []bool{false, false, false},
 			bucketCount:  2,
 			result: "column:0 ndv:0 totColSize:0\n" +
-				"num: 100000 lower_bound: 1 upper_bound: 2 repeats: 0\n" +
-				"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0",
+				"num: 100000 lower_bound: 1 upper_bound: 2 repeats: 0 ndv: 1\n" +
+				"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 2",
 		},
 		// test do not Merge if the result bucket count is too large
 		{
 			points:       []int64{1, 2, 2, 3, 3, 4, 4, 5},
 			counts:       []int64{1, 1, 100000, 100000},
+			ndvs:         []int64{1, 1, 1, 1},
 			isNewBuckets: []bool{false, false, false, false},
 			bucketCount:  3,
 			result: "column:0 ndv:0 totColSize:0\n" +
-				"num: 2 lower_bound: 1 upper_bound: 3 repeats: 0\n" +
-				"num: 100000 lower_bound: 3 upper_bound: 4 repeats: 0\n" +
-				"num: 100000 lower_bound: 4 upper_bound: 5 repeats: 0",
+				"num: 2 lower_bound: 1 upper_bound: 3 repeats: 0 ndv: 2\n" +
+				"num: 100000 lower_bound: 3 upper_bound: 4 repeats: 0 ndv: 1\n" +
+				"num: 100000 lower_bound: 4 upper_bound: 5 repeats: 0 ndv: 1",
 		},
 	}
 	for _, t := range tests {
+		if len(t.counts) != len(t.ndvs) {
+			c.Assert(false, IsTrue)
+		}
 		bkts := make([]bucket, 0, len(t.counts))
 		totalCount := int64(0)
 		for i := 0; i < len(t.counts); i++ {
 			lower, upper := types.NewIntDatum(t.points[2*i]), types.NewIntDatum(t.points[2*i+1])
-			bkts = append(bkts, bucket{&lower, &upper, t.counts[i], 0})
+			bkts = append(bkts, bucket{&lower, &upper, t.counts[i], 0, t.ndvs[i]})
 			totalCount += t.counts[i]
 		}
 		defaultBucketCount = t.bucketCount
@@ -232,8 +256,8 @@ func encodeInt(v int64) *types.Datum {
 func (s *testFeedbackSuite) TestFeedbackEncoding(c *C) {
 	hist := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLong), 0, 0)
 	q := &QueryFeedback{Hist: hist, Tp: PkType}
-	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0})
-	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(5), 1, 0})
+	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0, 1})
+	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(5), 1, 0, 1})
 	val, err := EncodeFeedback(q)
 	c.Assert(err, IsNil)
 	rq := &QueryFeedback{}
@@ -246,8 +270,8 @@ func (s *testFeedbackSuite) TestFeedbackEncoding(c *C) {
 
 	hist.Tp = types.NewFieldType(mysql.TypeBlob)
 	q = &QueryFeedback{Hist: hist}
-	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0})
-	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(1), 1, 0})
+	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0, 1})
+	q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(1), 1, 0, 1})
 	val, err = EncodeFeedback(q)
 	c.Assert(err, IsNil)
 	rq = &QueryFeedback{}
@@ -268,6 +292,9 @@ func (q *QueryFeedback) Equal(rq *QueryFeedback) bool {
 		if fb.Count != rfb.Count {
 			return false
 		}
+		if fb.Ndv != rfb.Ndv {
+			return false
+		}
 		if fb.Lower.Kind() == types.KindInt64 {
 			if fb.Lower.GetInt64() != rfb.Lower.GetInt64() {
 				return false
diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go
index 5fc609d82c488..fd43de6188ef4 100644
--- a/statistics/handle/handle.go
+++ b/statistics/handle/handle.go
@@ -696,7 +696,7 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg
 		if err != nil {
 			return
 		}
-		sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_buckets(table_id, is_index, hist_id, bucket_id, count, repeats, lower_bound, upper_bound) values(%d, %d, %d, %d, %d, %d, X'%X', X'%X')", tableID, isIndex, hg.ID, i, count, hg.Buckets[i].Repeat, lowerBound.GetBytes(), upperBound.GetBytes()))
+		sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_buckets(table_id, is_index, hist_id, bucket_id, count, repeats, lower_bound, upper_bound, ndv) values(%d, %d, %d, %d, %d, %d, X'%X', X'%X', %d)", tableID, isIndex, hg.ID, i, count, hg.Buckets[i].Repeat, lowerBound.GetBytes(), upperBound.GetBytes(), hg.Buckets[i].NDV))
 	}
 	if isAnalyzed == 1 && len(lastAnalyzePos) > 0 {
 		sqls = append(sqls, fmt.Sprintf("update mysql.stats_histograms set last_analyze_pos = X'%X' where table_id = %d and is_index = %d and hist_id = %d", lastAnalyzePos, tableID, isIndex, hg.ID))
@@ -729,7 +729,7 @@ func (h *Handle) SaveMetaToStorage(tableID, count, modifyCount int64) (err error
 }
 
 func (h *Handle) histogramFromStorage(reader *statsReader, tableID int64, colID int64, tp *types.FieldType, distinct int64, isIndex int, ver uint64, nullCount int64, totColSize int64, corr float64) (_ *statistics.Histogram, err error) {
-	selSQL := fmt.Sprintf("select count, repeats, lower_bound, upper_bound from mysql.stats_buckets where table_id = %d and is_index = %d and hist_id = %d order by bucket_id", tableID, isIndex, colID)
+	selSQL := fmt.Sprintf("select count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where table_id = %d and is_index = %d and hist_id = %d order by bucket_id", tableID, isIndex, colID)
 	rows, fields, err := reader.read(selSQL)
 	if err != nil {
 		return nil, errors.Trace(err)
@@ -759,7 +759,7 @@ func (h *Handle) histogramFromStorage(reader *statsReader, tableID int64, colID
 			}
 		}
 		totalCount += count
-		hg.AppendBucket(&lowerBound, &upperBound, totalCount, repeats)
+		hg.AppendBucketWithNDV(&lowerBound, &upperBound, totalCount, repeats, rows[i].GetInt64(4))
 	}
 	hg.PreCalculateScalar()
 	return hg, nil
diff --git a/statistics/handle/update.go b/statistics/handle/update.go
index 368a3c6adc682..5d0de1ea7079e 100644
--- a/statistics/handle/update.go
+++ b/statistics/handle/update.go
@@ -161,9 +161,9 @@ func (s *SessionStatsCollector) Update(id int64, delta int64, count int64, colSi
 
 var (
 	// MinLogScanCount is the minimum scan count for a feedback to be logged.
-	MinLogScanCount = int64(1000)
+	MinLogScanCount = int64(1)
 	// MinLogErrorRate is the minimum error rate for a feedback to be logged.
-	MinLogErrorRate = 0.5
+	MinLogErrorRate = 0.0
 )
 
 // StoreQueryFeedback merges the feedback into stats collector.
@@ -383,8 +383,17 @@ func (h *Handle) DumpStatsFeedbackToKV() error {
 				err = h.DumpFeedbackToKV(fb)
 			} else {
 				t, ok := h.statsCache.Load().(statsCache).tables[fb.PhysicalID]
-				if ok {
+				if !ok {
+					continue
+				}
+				idx, ok := t.Indices[fb.Hist.ID]
+				if !ok {
+					continue
+				}
+				if idx.StatsVer == statistics.Version1 {
 					err = h.DumpFeedbackForIndex(fb, t)
+				} else {
+					err = h.DumpFeedbackToKV(fb)
 				}
 			}
 			if err != nil {
@@ -427,6 +436,7 @@ func (h *Handle) DumpFeedbackToKV(fb *statistics.QueryFeedback) error {
 // feedback locally on this tidb-server, so it could be used more timely.
 func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) {
 	h.sweepList()
+	logutil.BgLogger().Warn("local feedback update")
 	for _, fbs := range h.feedback.Feedbacks {
 		for _, fb := range fbs {
 			h.mu.Lock()
@@ -442,6 +452,7 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) {
 				if !ok || idx.Histogram.Len() == 0 {
 					continue
 				}
+				logutil.BgLogger().Warn("local feedback update index")
 				newIdx := *idx
 				eqFB, ranFB := statistics.SplitFeedbackByQueryType(fb.Feedback)
 				newIdx.CMSketch = statistics.UpdateCMSketch(idx.CMSketch, eqFB)
@@ -1068,6 +1079,7 @@ func (h *Handle) DumpFeedbackForIndex(q *statistics.QueryFeedback, t *statistics
 	return errors.Trace(h.DumpFeedbackToKV(q))
 }
 
+
 // minAdjustFactor is the minimum adjust factor of each index feedback.
 // We use it to avoid adjusting too much when the assumption of independence failed.
 const minAdjustFactor = 0.7
diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go
index 35744b998d490..9b9bacfb7b8ef 100644
--- a/statistics/handle/update_test.go
+++ b/statistics/handle/update_test.go
@@ -760,25 +760,25 @@ func (s *testStatsSuite) TestQueryFeedback(c *C) {
 			// test primary key feedback
 			sql: "select * from t where t.a <= 5 order by a desc",
 			hist: "column:1 ndv:4 totColSize:0\n" +
-				"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0\n" +
-				"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0\n" +
-				"num: 1 lower_bound: 4 upper_bound: 4 repeats: 1",
+				"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 1\n" +
+				"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 2\n" +
+				"num: 1 lower_bound: 4 upper_bound: 4 repeats: 1 ndv: 1",
 			idxCols: 0,
 		},
 		{
 			// test index feedback by double read
 			sql: "select * from t use index(idx) where t.b <= 5",
-			hist: "index:1 ndv:2\n" +
-				"num: 3 lower_bound: -inf upper_bound: 5 repeats: 0\n" +
-				"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1",
+			hist: "index:1 ndv:3\n" +
+				"num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 2\n" +
+				"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1",
 			idxCols: 1,
 		},
 		{
 			// test index feedback by single read
 			sql: "select b from t use index(idx) where t.b <= 5",
-			hist: "index:1 ndv:2\n" +
-				"num: 3 lower_bound: -inf upper_bound: 5 repeats: 0\n" +
-				"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1",
+			hist: "index:1 ndv:3\n" +
+				"num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 2\n" +
+				"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1",
 			idxCols: 1,
 		},
 	}
@@ -880,22 +880,22 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) {
 			// test primary key feedback
 			sql: "select * from t where t.a <= 5",
 			hist: "column:1 ndv:2 totColSize:0\n" +
-				"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0\n" +
-				"num: 1 lower_bound: 2 upper_bound: 5 repeats: 0",
+				"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 1\n" +
+				"num: 1 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 1",
 			idxCols: 0,
 		},
 		{
 			// test index feedback by double read
 			sql: "select * from t use index(idx) where t.b <= 5",
 			hist: "index:1 ndv:1\n" +
-				"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0",
+				"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 1",
 			idxCols: 1,
 		},
 		{
 			// test index feedback by single read
 			sql: "select b from t use index(idx) where t.b <= 5",
 			hist: "index:1 ndv:1\n" +
-				"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0",
+				"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 1",
 			idxCols: 1,
 		},
 	}
@@ -1016,10 +1016,10 @@ func (s *testStatsSuite) TestUpdateStatsByLocalFeedback(c *C) {
 	h.UpdateStatsByLocalFeedback(s.do.InfoSchema())
 	tbl := h.GetTableStats(tblInfo)
 
-	c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+
-		"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1\n"+
-		"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0\n"+
-		"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0")
+	c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:1 totColSize:0\n"+
+		"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 1\n"+
+		"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+
+		"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0")
 	sc := &stmtctx.StatementContext{TimeZone: time.Local}
 	low, err := codec.EncodeKey(sc, nil, types.NewIntDatum(5))
 	c.Assert(err, IsNil)
@@ -1027,8 +1027,8 @@ func (s *testStatsSuite) TestUpdateStatsByLocalFeedback(c *C) {
 	c.Assert(tbl.Indices[tblInfo.Indices[0].ID].CMSketch.QueryBytes(low), Equals, uint64(2))
 
 	c.Assert(tbl.Indices[tblInfo.Indices[0].ID].ToString(1), Equals, "index:1 ndv:2\n"+
-		"num: 2 lower_bound: -inf upper_bound: 5 repeats: 0\n"+
-		"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1")
+		"num: 2 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 1\n"+
+		"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1")
 
 	// Test that it won't cause panic after update.
 	testKit.MustQuery("select * from t use index(idx) where b > 0")
@@ -1072,10 +1072,10 @@ func (s *testStatsSuite) TestUpdatePartitionStatsByLocalFeedback(c *C) {
 	pid := tblInfo.Partition.Definitions[0].ID
 	tbl := h.GetPartitionStats(tblInfo, pid)
 
-	c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+
-		"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1\n"+
-		"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0\n"+
-		"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0")
+	c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:1 totColSize:0\n"+
+		"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 1\n"+
+		"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+
+		"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0")
 }
 
 type logHook struct {
@@ -1148,21 +1148,21 @@ func (s *testStatsSuite) TestLogDetailedInfo(c *C) {
 	}{
 		{
 			sql: "select * from t where t.a <= 15",
-			result: "[stats-feedback] test.t, column=a, rangeStr=range: [-inf,8), actual: 8, expected: 8, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}" +
-				"[stats-feedback] test.t, column=a, rangeStr=range: [8,15), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}",
+			result: "[stats-feedback] test.t, column=a, rangeStr=range: [-inf,8), actual: 8, expected: 8, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 8, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}" +
+				"[stats-feedback] test.t, column=a, rangeStr=range: [8,15), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}",
 		},
 		{
 			sql: "select * from t use index(idx) where t.b <= 15",
-			result: "[stats-feedback] test.t, index=idx, rangeStr=range: [-inf,8), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}" +
-				"[stats-feedback] test.t, index=idx, rangeStr=range: [8,16), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1, num: 4 lower_bound: 16 upper_bound: 19 repeats: 1}",
+			result: "[stats-feedback] test.t, index=idx, rangeStr=range: [-inf,8), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 8, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}" +
+				"[stats-feedback] test.t, index=idx, rangeStr=range: [8,16), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8, num: 4 lower_bound: 16 upper_bound: 19 repeats: 1 ndv: 4}",
 		},
 		{
 			sql:    "select b from t use index(idx_ba) where b = 1 and a <= 5",
-			result: "[stats-feedback] test.t, index=idx_ba, actual=1, equality=1, expected equality=1, range=range: [-inf,6], actual: -1, expected: 6, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1}",
+			result: "[stats-feedback] test.t, index=idx_ba, rangeStr=range: [1 -inf,1 6), actual: 1, expected: 0, histogram: {num: 8 lower_bound: (0, 0) upper_bound: (7, 7) repeats: 1 ndv: 8}",
 		},
 		{
 			sql:    "select b from t use index(idx_bc) where b = 1 and c <= 5",
-			result: "[stats-feedback] test.t, index=idx_bc, actual=1, equality=1, expected equality=1, range=[-inf,6], pseudo count=7",
+			result: "[stats-feedback] test.t, index=idx_bc, rangeStr=range: [1 -inf,1 6), actual: 1, expected: 0, histogram: {num: 8 lower_bound: (0, 0) upper_bound: (7, 7) repeats: 1 ndv: 8}",
 		},
 		{
 			sql:    "select b from t use index(idx_ba) where b = 1",
@@ -1585,25 +1585,25 @@ func (s *testStatsSuite) TestFeedbackRanges(c *C) {
 		{
 			sql: "select * from t where a <= 50 or (a > 130 and a < 140)",
 			hist: "column:1 ndv:30 totColSize:0\n" +
-				"num: 8 lower_bound: -128 upper_bound: 8 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
-				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0",
+				"num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 8\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" +
+				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14",
 			colID: 1,
 		},
 		{
 			sql: "select * from t where a >= 10",
 			hist: "column:1 ndv:30 totColSize:0\n" +
-				"num: 8 lower_bound: -128 upper_bound: 8 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
-				"num: 14 lower_bound: 16 upper_bound: 127 repeats: 0",
+				"num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 8\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" +
+				"num: 14 lower_bound: 16 upper_bound: 127 repeats: 0 ndv: 14",
 			colID: 1,
 		},
 		{
 			sql: "select * from t use index(idx) where a = 1 and (b <= 50 or (b > 130 and b < 140))",
 			hist: "column:2 ndv:20 totColSize:30\n" +
-				"num: 8 lower_bound: -128 upper_bound: 7 repeats: 0\n" +
-				"num: 8 lower_bound: 7 upper_bound: 14 repeats: 0\n" +
-				"num: 7 lower_bound: 14 upper_bound: 51 repeats: 0",
+				"num: 8 lower_bound: -128 upper_bound: 7 repeats: 0 ndv: 8\n" +
+				"num: 8 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 8\n" +
+				"num: 7 lower_bound: 14 upper_bound: 51 repeats: 0 ndv: 7",
 			colID: 2,
 		},
 	}
@@ -1665,33 +1665,33 @@ func (s *testStatsSuite) TestUnsignedFeedbackRanges(c *C) {
 		{
 			sql: "select * from t where a <= 50",
 			hist: "column:1 ndv:30 totColSize:10\n" +
-				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
-				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0",
+				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 8\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" +
+				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14",
 			tblName: "t",
 		},
 		{
 			sql: "select count(*) from t",
 			hist: "column:1 ndv:30 totColSize:10\n" +
-				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
-				"num: 14 lower_bound: 16 upper_bound: 255 repeats: 0",
+				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" +
+				"num: 14 lower_bound: 16 upper_bound: 255 repeats: 0 ndv: 0",
 			tblName: "t",
 		},
 		{
 			sql: "select * from t1 where a <= 50",
 			hist: "column:1 ndv:30 totColSize:10\n" +
-				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
-				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0",
+				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 8\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" +
+				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14",
 			tblName: "t1",
 		},
 		{
 			sql: "select count(*) from t1",
 			hist: "column:1 ndv:30 totColSize:10\n" +
-				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
-				"num: 14 lower_bound: 16 upper_bound: 18446744073709551615 repeats: 0",
+				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" +
+				"num: 14 lower_bound: 16 upper_bound: 18446744073709551615 repeats: 0 ndv: 0",
 			tblName: "t1",
 		},
 	}
diff --git a/statistics/histogram.go b/statistics/histogram.go
index 70bf8960597ec..1d15e15f95d6a 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -79,6 +79,7 @@ type Histogram struct {
 type Bucket struct {
 	Count  int64
 	Repeat int64
+	NDV    int64
 }
 
 type scalar struct {
@@ -201,7 +202,14 @@ func (c *Column) AvgColSizeListInDisk(count int64) float64 {
 
 // AppendBucket appends a bucket into `hg`.
 func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64) {
-	hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat})
+	hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat, NDV: 1})
+	hg.Bounds.AppendDatum(0, lower)
+	hg.Bounds.AppendDatum(0, upper)
+}
+
+// AppendBucketWithNDV appends a bucket into `hg` and set value for field `NDV`.
+func (hg *Histogram) AppendBucketWithNDV(lower *types.Datum, upper *types.Datum, count, repeat, ndv int64) {
+	hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat, NDV: ndv})
 	hg.Bounds.AppendDatum(0, lower)
 	hg.Bounds.AppendDatum(0, upper)
 }
@@ -210,7 +218,9 @@ func (hg *Histogram) updateLastBucket(upper *types.Datum, count, repeat int64) {
 	len := hg.Len()
 	hg.Bounds.TruncateTo(2*len - 1)
 	hg.Bounds.AppendDatum(0, upper)
-	hg.Buckets[len-1] = Bucket{Count: count, Repeat: repeat}
+	hg.Buckets[len-1].Count = count
+	hg.Buckets[len-1].Repeat = repeat
+	hg.Buckets[len-1].NDV++
 }
 
 // DecodeTo decodes the histogram bucket values into `Tp`.
@@ -261,9 +271,13 @@ func HistogramEqual(a, b *Histogram, ignoreID bool) bool {
 }
 
 // constants for stats version. These const can be used for solving compatibility issue.
+// If the version number is 0, it means the most original statistics.
 const (
-	CurStatsVersion = Version1
-	Version1        = 1
+	CurStatsVersion = Version2
+	// Version1 added CMSketch.
+	Version1 = 1
+	// Version2 added bucket NDV for index's full analyze.
+	Version2 = 2
 )
 
 // AnalyzeFlag is set when the statistics comes from analyze and has not been modified by feedback.
@@ -302,7 +316,7 @@ func (hg *Histogram) BucketToString(bktID, idxCols int) string {
 	terror.Log(errors.Trace(err))
 	lowerVal, err := ValueToString(hg.GetLower(bktID), idxCols)
 	terror.Log(errors.Trace(err))
-	return fmt.Sprintf("num: %d lower_bound: %s upper_bound: %s repeats: %d", hg.bucketCount(bktID), lowerVal, upperVal, hg.Buckets[bktID].Repeat)
+	return fmt.Sprintf("num: %d lower_bound: %s upper_bound: %s repeats: %d ndv: %d", hg.bucketCount(bktID), lowerVal, upperVal, hg.Buckets[bktID].Repeat, hg.Buckets[bktID].NDV)
 }
 
 // ToString gets the string representation for the histogram.
@@ -327,6 +341,9 @@ func (hg *Histogram) equalRowCount(value types.Datum) float64 {
 		if match {
 			return float64(hg.Buckets[index/2].Repeat)
 		}
+		if hg.Buckets[index/2].NDV > 0 {
+			return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV)
+		}
 		return hg.notNullCount() / float64(hg.NDV)
 	}
 	if match {
@@ -334,6 +351,9 @@ func (hg *Histogram) equalRowCount(value types.Datum) float64 {
 		if cmp(hg.Bounds.GetRow(index), 0, hg.Bounds.GetRow(index+1), 0) == 0 {
 			return float64(hg.Buckets[index/2].Repeat)
 		}
+		if hg.Buckets[index/2].NDV > 0 {
+			return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV)
+		}
 		return hg.notNullCount() / float64(hg.NDV)
 	}
 	return 0
@@ -410,7 +430,9 @@ func (hg *Histogram) mergeBuckets(bucketIdx int) {
 	curBuck := 0
 	c := chunk.NewChunkWithCapacity([]*types.FieldType{hg.Tp}, bucketIdx)
 	for i := 0; i+1 <= bucketIdx; i += 2 {
-		hg.Buckets[curBuck] = hg.Buckets[i+1]
+		hg.Buckets[curBuck].NDV = hg.Buckets[i+1].NDV + hg.Buckets[i].NDV
+		hg.Buckets[curBuck].Count = hg.Buckets[i+1].Count
+		hg.Buckets[curBuck].Repeat = hg.Buckets[i+1].Repeat
 		c.AppendDatum(0, hg.GetLower(i))
 		c.AppendDatum(0, hg.GetUpper(i+1))
 		curBuck++
@@ -578,6 +600,7 @@ func HistogramToProto(hg *Histogram) *tipb.Histogram {
 			LowerBound: hg.GetLower(i).GetBytes(),
 			UpperBound: hg.GetUpper(i).GetBytes(),
 			Repeats:    hg.Buckets[i].Repeat,
+			Ndv:        &hg.Buckets[i].NDV,
 		}
 		protoHg.Buckets = append(protoHg.Buckets, bkt)
 	}
@@ -592,7 +615,11 @@ func HistogramFromProto(protoHg *tipb.Histogram) *Histogram {
 	hg := NewHistogram(0, protoHg.Ndv, 0, 0, tp, len(protoHg.Buckets), 0)
 	for _, bucket := range protoHg.Buckets {
 		lower, upper := types.NewBytesDatum(bucket.LowerBound), types.NewBytesDatum(bucket.UpperBound)
-		hg.AppendBucket(&lower, &upper, bucket.Count, bucket.Repeats)
+		if bucket.Ndv != nil {
+			hg.AppendBucketWithNDV(&lower, &upper, bucket.Count, bucket.Repeats, *bucket.Ndv)
+		} else {
+			hg.AppendBucket(&lower, &upper, bucket.Count, bucket.Repeats)
+		}
 	}
 	return hg
 }
@@ -626,6 +653,7 @@ func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram,
 	offset := int64(0)
 	if cmp == 0 {
 		lh.NDV--
+		lh.Buckets[len(lh.Buckets)-1].NDV--
 		lh.updateLastBucket(rh.GetUpper(0), lh.Buckets[lLen-1].Count+rh.Buckets[0].Count, rh.Buckets[0].Repeat)
 		offset = rh.Buckets[0].Count
 		rh.popFirstBucket()
@@ -916,20 +944,20 @@ func (idx *Index) MemoryUsage() (sum int64) {
 
 var nullKeyBytes, _ = codec.EncodeKey(nil, nil, types.NewDatum(nil))
 
-func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte, modifyCount int64) (float64, error) {
+func (idx *Index) equalRowCount(b []byte, modifyCount int64) float64 {
 	if len(idx.Info.Columns) == 1 {
 		if bytes.Equal(b, nullKeyBytes) {
-			return float64(idx.NullCount), nil
+			return float64(idx.NullCount)
 		}
 	}
 	val := types.NewBytesDatum(b)
 	if idx.NDV > 0 && idx.outOfRange(val) {
-		return outOfRangeEQSelectivity(idx.NDV, modifyCount, int64(idx.TotalRowCount())) * idx.TotalRowCount(), nil
+		return outOfRangeEQSelectivity(idx.NDV, modifyCount, int64(idx.TotalRowCount())) * idx.TotalRowCount()
 	}
-	if idx.CMSketch != nil {
-		return float64(idx.CMSketch.QueryBytes(b)), nil
+	if idx.CMSketch != nil && (len(idx.Histogram.Buckets) == 0 || idx.Histogram.Buckets[0].NDV == 0) {
+		return float64(idx.CMSketch.QueryBytes(b))
 	}
-	return idx.Histogram.equalRowCount(val), nil
+	return idx.Histogram.equalRowCount(val)
 }
 
 // GetRowCount returns the row count of the given ranges.
@@ -957,10 +985,7 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, indexRanges []*range
 					totalCount += 1
 					continue
 				}
-				count, err := idx.equalRowCount(sc, lb, modifyCount)
-				if err != nil {
-					return 0, err
-				}
+				count := idx.equalRowCount(lb, modifyCount)
 				totalCount += count
 				continue
 			}
diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go
index 4fbf65ffffc50..aec115c5599a3 100644
--- a/statistics/statistics_test.go
+++ b/statistics/statistics_test.go
@@ -675,5 +675,5 @@ func (s *testStatisticsSuite) TestIndexRanges(c *C) {
 	ran[0].HighVal[0] = types.NewIntDatum(1000)
 	count, err = tbl.GetRowCountByIndexRanges(sc, 0, ran)
 	c.Assert(err, IsNil)
-	c.Assert(int(count), Equals, 0)
+	c.Assert(int(count), Equals, 3)
 }
diff --git a/statistics/table.go b/statistics/table.go
index e080e755b1061..ddfc90fe64ca5 100644
--- a/statistics/table.go
+++ b/statistics/table.go
@@ -489,6 +489,9 @@ func (coll *HistColl) getEqualCondSelectivity(idx *Index, bytes []byte, usedCols
 		}
 		return outOfRangeEQSelectivity(ndv, coll.ModifyCount, int64(idx.TotalRowCount()))
 	}
+	if coverAll && len(idx.Histogram.Buckets) > 0 && idx.Histogram.Buckets[0].NDV > 0 {
+		return idx.Histogram.equalRowCount(val)
+	}
 	return float64(idx.CMSketch.QueryBytes(bytes)) / float64(idx.TotalRowCount())
 }
 
diff --git a/statistics/testdata/stats_suite_out.json b/statistics/testdata/stats_suite_out.json
index cd4fa051551d8..b92788b9c0b69 100644
--- a/statistics/testdata/stats_suite_out.json
+++ b/statistics/testdata/stats_suite_out.json
@@ -60,8 +60,8 @@
     "Name": "TestDiscreteDistribution",
     "Cases": [
       [
-        "IndexReader_6 0.00 root  index:IndexRangeScan_5",
-        "└─IndexRangeScan_5 0.00 cop[tikv] table:t, index:idx(a, b) range:[\"tw\" -inf,\"tw\" 0), keep order:false"
+        "IndexReader_6 1.02 root  index:IndexRangeScan_5",
+        "└─IndexRangeScan_5 1.02 cop[tikv] table:t, index:idx(a, b) range:[\"tw\" -inf,\"tw\" 0), keep order:false"
       ]
     ]
   },
@@ -92,8 +92,8 @@
     "Name": "TestCollationColumnEstimate",
     "Cases": [
       [
-        "test t  a 0 0 2 2 \u0000A\u0000A\u0000A \u0000A\u0000A\u0000A",
-        "test t  a 0 1 4 2 \u0000B\u0000B\u0000B \u0000B\u0000B\u0000B"
+        "test t  a 0 0 2 2 \u0000A\u0000A\u0000A \u0000A\u0000A\u0000A 1",
+        "test t  a 0 1 4 2 \u0000B\u0000B\u0000B \u0000B\u0000B\u0000B 1"
       ],
       [
         "TableReader_7 2.00 root  data:Selection_6",
diff --git a/store/mockstore/unistore/cophandler/closure_exec.go b/store/mockstore/unistore/cophandler/closure_exec.go
index 4342ab0d7388b..4b5aa22f9dccc 100644
--- a/store/mockstore/unistore/cophandler/closure_exec.go
+++ b/store/mockstore/unistore/cophandler/closure_exec.go
@@ -141,6 +141,10 @@ func newClosureExecutor(dagCtx *dagContext, dagReq *tipb.DAGRequest) (*closureEx
 		e.unique = idxScan.GetUnique()
 		e.scanCtx.desc = idxScan.Desc
 		e.initIdxScanCtx(idxScan)
+		if dagReq.GetCollectRangeCounts() {
+			e.idxScanCtx.collectNdv = true
+			e.idxScanCtx.previousVals = make([][]byte, e.idxScanCtx.columnLen)
+		}
 	default:
 		panic(fmt.Sprintf("unknown first executor type %s", executors[0].Tp))
 	}
@@ -150,6 +154,7 @@ func newClosureExecutor(dagCtx *dagContext, dagReq *tipb.DAGRequest) (*closureEx
 	}
 	if dagReq.GetCollectRangeCounts() {
 		e.counts = make([]int64, len(ranges))
+		e.ndvs = make([]int64, len(ranges))
 	}
 	e.kvRanges = ranges
 	e.scanCtx.chk = chunk.NewChunkWithCapacity(e.fieldTps, 32)
@@ -315,6 +320,8 @@ type closureExecutor struct {
 	processor closureProcessor
 
 	counts []int64
+	ndvs   []int64
+	curNdv int64
 }
 
 type closureProcessor interface {
@@ -339,6 +346,9 @@ type idxScanCtx struct {
 	columnLen        int
 	colInfos         []rowcodec.ColInfo
 	primaryColumnIds []int64
+
+	collectNdv   bool
+	previousVals [][]byte
 }
 
 type aggCtx struct {
@@ -362,6 +372,7 @@ func (e *closureExecutor) execute() ([]tipb.Chunk, error) {
 	}
 	dbReader := e.dbReader
 	for i, ran := range e.kvRanges {
+		e.curNdv = 0
 		if e.isPointGetRange(ran) {
 			val, err := dbReader.Get(ran.StartKey, e.startTS)
 			if err != nil {
@@ -372,6 +383,7 @@ func (e *closureExecutor) execute() ([]tipb.Chunk, error) {
 			}
 			if e.counts != nil {
 				e.counts[i]++
+				e.ndvs[i] = e.curNdv
 			}
 			err = e.processor.Process(ran.StartKey, val)
 			if err != nil {
@@ -387,6 +399,7 @@ func (e *closureExecutor) execute() ([]tipb.Chunk, error) {
 			delta := int64(e.rowCount - oldCnt)
 			if e.counts != nil {
 				e.counts[i] += delta
+				e.ndvs[i] = e.curNdv
 			}
 			if err != nil {
 				return nil, errors.Trace(err)
@@ -591,6 +604,7 @@ func (e *closureExecutor) tableScanProcessCore(key, value []byte) error {
 	if err != nil {
 		return errors.Trace(err)
 	}
+	e.curNdv++
 	return nil
 }
 
@@ -619,6 +633,15 @@ func (e *indexScanProcessor) Finish() error {
 	return e.scanFinish()
 }
 
+func (isc *idxScanCtx) checkVal(curVals [][]byte) bool {
+	for i := 0; i < isc.columnLen; i++ {
+		if bytes.Compare(isc.previousVals[i], curVals[i]) != 0 {
+			return false
+		}
+	}
+	return true
+}
+
 func (e *closureExecutor) indexScanProcessCore(key, value []byte) error {
 	handleStatus := mapPkStatusToHandleStatus(e.idxScanCtx.pkStatus)
 	restoredCols := make([]rowcodec.ColInfo, 0, len(e.idxScanCtx.colInfos))
@@ -631,6 +654,14 @@ func (e *closureExecutor) indexScanProcessCore(key, value []byte) error {
 	if err != nil {
 		return err
 	}
+	if e.idxScanCtx.collectNdv {
+		if len(e.idxScanCtx.previousVals[0]) == 0 || !e.idxScanCtx.checkVal(values) {
+			e.curNdv++
+			for i := 0; i < e.idxScanCtx.columnLen; i++ {
+				e.idxScanCtx.previousVals[i] = append(e.idxScanCtx.previousVals[i][:0], values[i]...)
+			}
+		}
+	}
 	chk := e.scanCtx.chk
 	decoder := codec.NewDecoder(chk, e.sc.TimeZone)
 	for i, colVal := range values {
diff --git a/store/mockstore/unistore/cophandler/cop_handler.go b/store/mockstore/unistore/cophandler/cop_handler.go
index 41453f85a34b6..2dc6528374359 100644
--- a/store/mockstore/unistore/cophandler/cop_handler.go
+++ b/store/mockstore/unistore/cophandler/cop_handler.go
@@ -74,10 +74,10 @@ func handleCopDAGRequest(dbReader *dbreader.DBReader, lockStore *lockstore.MemSt
 	}
 	closureExec, err := buildClosureExecutor(dagCtx, dagReq)
 	if err != nil {
-		return buildResp(nil, nil, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime))
+		return buildResp(nil, nil, nil, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime))
 	}
 	chunks, err := closureExec.execute()
-	return buildResp(chunks, closureExec.counts, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime))
+	return buildResp(chunks, closureExec.counts, closureExec.ndvs, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime))
 }
 
 func buildDAG(reader *dbreader.DBReader, lockStore *lockstore.MemStore, req *coprocessor.Request) (*dagContext, *tipb.DAGRequest, error) {
@@ -268,12 +268,13 @@ func (e *ErrLocked) Error() string {
 	return fmt.Sprintf("key is locked, key: %q, Type: %v, primary: %q, startTS: %v", e.Key, e.LockType, e.Primary, e.StartTS)
 }
 
-func buildResp(chunks []tipb.Chunk, counts []int64, dagReq *tipb.DAGRequest, err error, warnings []stmtctx.SQLWarn, dur time.Duration) *coprocessor.Response {
+func buildResp(chunks []tipb.Chunk, counts, ndvs []int64, dagReq *tipb.DAGRequest, err error, warnings []stmtctx.SQLWarn, dur time.Duration) *coprocessor.Response {
 	resp := &coprocessor.Response{}
 	selResp := &tipb.SelectResponse{
 		Error:        toPBError(err),
 		Chunks:       chunks,
 		OutputCounts: counts,
+		Ndvs:         ndvs,
 	}
 	if dagReq.CollectExecutionSummaries != nil && *dagReq.CollectExecutionSummaries {
 		execSummary := make([]*tipb.ExecutorExecutionSummary, len(dagReq.Executors))

From b4bdd723fa460daad4bb283978f3e530d1b5f939 Mon Sep 17 00:00:00 2001
From: Yiding Cui <winoros@gmail.com>
Date: Mon, 26 Oct 2020 20:08:27 +0800
Subject: [PATCH 2/9] address comments

---
 executor/analyze.go            | 8 +++++++-
 statistics/handle/dump.go      | 4 ++--
 statistics/handle/dump_test.go | 2 +-
 statistics/handle/handle.go    | 6 +++---
 statistics/handle/update.go    | 8 +++++---
 statistics/histogram.go        | 3 +++
 6 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/executor/analyze.go b/executor/analyze.go
index a23f7aa26a6bd..0f6d7cfc566e9 100755
--- a/executor/analyze.go
+++ b/executor/analyze.go
@@ -112,7 +112,7 @@ func (e *AnalyzeExec) Next(ctx context.Context, req *chunk.Chunk) error {
 			continue
 		}
 		for i, hg := range result.Hist {
-			err1 := statsHandle.SaveStatsToStorage(result.PhysicalTableID, result.Count, result.IsIndex, hg, result.Cms[i], 1)
+			err1 := statsHandle.SaveStatsToStorage(result.PhysicalTableID, result.Count, result.IsIndex, hg, result.Cms[i], statistics.CurStatsVersion, 1)
 			if err1 != nil {
 				err = err1
 				logutil.Logger(ctx).Error("save stats to storage failed", zap.Error(err))
@@ -239,6 +239,7 @@ func analyzeIndexPushdown(idxExec *AnalyzeIndexExec) analyzeResult {
 		Cms:             []*statistics.CMSketch{cms},
 		IsIndex:         1,
 		job:             idxExec.job,
+		StatsVer:        statistics.CurStatsVersion,
 	}
 	result.Count = hist.NullCount
 	if hist.Len() > 0 {
@@ -401,6 +402,7 @@ func analyzeColumnsPushdown(colExec *AnalyzeColumnsExec) analyzeResult {
 		Cms:             cms,
 		ExtStats:        extStats,
 		job:             colExec.job,
+		StatsVer:        statistics.Version0,
 	}
 	hist := hists[0]
 	result.Count = hist.NullCount
@@ -598,6 +600,7 @@ func analyzeFastExec(exec *AnalyzeFastExec) []analyzeResult {
 				IsIndex:         1,
 				Count:           hists[i].NullCount,
 				job:             exec.job,
+				StatsVer:        statistics.Version1,
 			}
 			if hists[i].Len() > 0 {
 				idxResult.Count += hists[i].Buckets[hists[i].Len()-1].Count
@@ -1220,6 +1223,7 @@ func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult
 		Cms:             []*statistics.CMSketch{cms},
 		IsIndex:         1,
 		job:             idxExec.job,
+		StatsVer:        statistics.Version2,
 	}
 	result.Count = hist.NullCount
 	if hist.Len() > 0 {
@@ -1257,6 +1261,7 @@ func analyzePKIncremental(colExec *analyzePKIncrementalExec) analyzeResult {
 		Hist:            []*statistics.Histogram{hist},
 		Cms:             []*statistics.CMSketch{nil},
 		job:             colExec.job,
+		StatsVer:        statistics.Version0,
 	}
 	if hist.Len() > 0 {
 		result.Count += hist.Buckets[hist.Len()-1].Count
@@ -1275,4 +1280,5 @@ type analyzeResult struct {
 	IsIndex         int
 	Err             error
 	job             *statistics.AnalyzeJob
+	StatsVer        int64
 }
diff --git a/statistics/handle/dump.go b/statistics/handle/dump.go
index 16295569d76c0..8d7d3f18c2f71 100644
--- a/statistics/handle/dump.go
+++ b/statistics/handle/dump.go
@@ -156,13 +156,13 @@ func (h *Handle) loadStatsFromJSON(tableInfo *model.TableInfo, physicalID int64,
 	}
 
 	for _, col := range tbl.Columns {
-		err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, &col.Histogram, col.CMSketch, 1)
+		err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, &col.Histogram, col.CMSketch, 1, 0)
 		if err != nil {
 			return errors.Trace(err)
 		}
 	}
 	for _, idx := range tbl.Indices {
-		err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 1, &idx.Histogram, idx.CMSketch, 1)
+		err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 1, &idx.Histogram, idx.CMSketch, idx.StatsVer, 1)
 		if err != nil {
 			return errors.Trace(err)
 		}
diff --git a/statistics/handle/dump_test.go b/statistics/handle/dump_test.go
index 2d4dcc52ff637..0f9fef1b7bdee 100644
--- a/statistics/handle/dump_test.go
+++ b/statistics/handle/dump_test.go
@@ -150,7 +150,7 @@ func (s *testStatsSuite) TestDumpCMSketchWithTopN(c *C) {
 	cms, _, _ := statistics.NewCMSketchWithTopN(5, 2048, fakeData, 20, 100)
 
 	stat := h.GetTableStats(tableInfo)
-	err = h.SaveStatsToStorage(tableInfo.ID, 1, 0, &stat.Columns[tableInfo.Columns[0].ID].Histogram, cms, 1)
+	err = h.SaveStatsToStorage(tableInfo.ID, 1, 0, &stat.Columns[tableInfo.Columns[0].ID].Histogram, cms, statistics.CurStatsVersion, 1)
 	c.Assert(err, IsNil)
 	c.Assert(h.Update(is), IsNil)
 
diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go
index fd43de6188ef4..0f010227e55f9 100644
--- a/statistics/handle/handle.go
+++ b/statistics/handle/handle.go
@@ -635,7 +635,7 @@ func (h *Handle) extendedStatsFromStorage(reader *statsReader, table *statistics
 }
 
 // SaveStatsToStorage saves the stats to storage.
-func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg *statistics.Histogram, cms *statistics.CMSketch, isAnalyzed int64) (err error) {
+func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg *statistics.Histogram, cms *statistics.CMSketch, statsVer int64, isAnalyzed int64) (err error) {
 	h.mu.Lock()
 	defer h.mu.Unlock()
 	ctx := context.TODO()
@@ -673,8 +673,8 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg
 	if isAnalyzed == 1 {
 		flag = statistics.AnalyzeFlag
 	}
-	sqls = append(sqls, fmt.Sprintf("replace into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, flag, correlation) values (%d, %d, %d, %d, %d, %d, X'%X', %d, %d, %d, %f)",
-		tableID, isIndex, hg.ID, hg.NDV, version, hg.NullCount, data, hg.TotColSize, statistics.CurStatsVersion, flag, hg.Correlation))
+	sqls = append(sqls, fmt.Sprintf("replace into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, statsVer, flag, correlation) values (%d, %d, %d, %d, %d, %d, X'%X', %d, %d, %d, %f)",
+		tableID, isIndex, hg.ID, hg.NDV, version, hg.NullCount, data, hg.TotColSize, statsVer, flag, hg.Correlation))
 	sqls = append(sqls, fmt.Sprintf("delete from mysql.stats_buckets where table_id = %d and is_index = %d and hist_id = %d", tableID, isIndex, hg.ID))
 	sc := h.mu.ctx.GetSessionVars().StmtCtx
 	var lastAnalyzePos []byte
diff --git a/statistics/handle/update.go b/statistics/handle/update.go
index 5d0de1ea7079e..a2da21eb60b5b 100644
--- a/statistics/handle/update.go
+++ b/statistics/handle/update.go
@@ -564,9 +564,11 @@ func (h *Handle) handleSingleHistogramUpdate(is infoschema.InfoSchema, rows []ch
 	}
 	var cms *statistics.CMSketch
 	var hist *statistics.Histogram
+	var statsVer int64 = 0
 	if isIndex == 1 {
 		idx, ok := tbl.Indices[histID]
 		if ok && idx.Histogram.Len() > 0 {
+			statsVer = idx.StatsVer
 			idxHist := idx.Histogram
 			hist = &idxHist
 			cms = idx.CMSketch.Copy()
@@ -589,7 +591,7 @@ func (h *Handle) handleSingleHistogramUpdate(is infoschema.InfoSchema, rows []ch
 			logutil.BgLogger().Debug("decode feedback failed", zap.Error(err))
 		}
 	}
-	err = h.dumpStatsUpdateToKV(physicalTableID, isIndex, q, hist, cms)
+	err = h.dumpStatsUpdateToKV(physicalTableID, isIndex, q, hist, cms, statsVer)
 	return errors.Trace(err)
 }
 
@@ -608,9 +610,9 @@ func (h *Handle) deleteOutdatedFeedback(tableID, histID, isIndex int64) error {
 	return nil
 }
 
-func (h *Handle) dumpStatsUpdateToKV(tableID, isIndex int64, q *statistics.QueryFeedback, hist *statistics.Histogram, cms *statistics.CMSketch) error {
+func (h *Handle) dumpStatsUpdateToKV(tableID, isIndex int64, q *statistics.QueryFeedback, hist *statistics.Histogram, cms *statistics.CMSketch, statsVer int64) error {
 	hist = statistics.UpdateHistogram(hist, q)
-	err := h.SaveStatsToStorage(tableID, -1, int(isIndex), hist, cms, 0)
+	err := h.SaveStatsToStorage(tableID, -1, int(isIndex), hist, cms, statsVer, 0)
 	metrics.UpdateStatsCounter.WithLabelValues(metrics.RetLabel(err)).Inc()
 	return errors.Trace(err)
 }
diff --git a/statistics/histogram.go b/statistics/histogram.go
index 1d15e15f95d6a..c54231d1fff30 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -274,6 +274,9 @@ func HistogramEqual(a, b *Histogram, ignoreID bool) bool {
 // If the version number is 0, it means the most original statistics.
 const (
 	CurStatsVersion = Version2
+
+	// Version0 is the most early statistics only histogram.
+	Version0 = 0
 	// Version1 added CMSketch.
 	Version1 = 1
 	// Version2 added bucket NDV for index's full analyze.

From 7e6638e0584e950141abd68c91c652a59e767a43 Mon Sep 17 00:00:00 2001
From: Yiding Cui <winoros@gmail.com>
Date: Tue, 15 Dec 2020 03:19:40 +0800
Subject: [PATCH 3/9] add version control and make feedback collect work again

---
 distsql/select_result.go                      |  2 +-
 executor/analyze.go                           | 12 +++--
 planner/core/planbuilder.go                   |  2 +-
 statistics/builder.go                         | 20 +++++--
 statistics/feedback.go                        |  2 +-
 statistics/feedback_test.go                   |  2 +-
 statistics/handle/update.go                   |  6 +--
 statistics/histogram.go                       | 52 +++++++++++++++++--
 statistics/sample_test.go                     |  2 +-
 statistics/statistics_test.go                 |  6 +--
 statistics/table.go                           |  2 +-
 store/mockstore/mocktikv/analyze.go           |  4 +-
 .../mockstore/unistore/cophandler/analyze.go  | 14 +++--
 .../unistore/cophandler/closure_exec.go       |  2 +-
 14 files changed, 96 insertions(+), 32 deletions(-)

diff --git a/distsql/select_result.go b/distsql/select_result.go
index b2e3ae327dd24..7208928732f60 100644
--- a/distsql/select_result.go
+++ b/distsql/select_result.go
@@ -147,7 +147,7 @@ func (r *selectResult) fetchResp(ctx context.Context) error {
 			sc.AppendWarning(dbterror.ClassTiKV.Synthesize(terror.ErrCode(warning.Code), warning.Msg))
 		}
 		if r.feedback != nil {
-			logutil.BgLogger().Warn("select resp", zap.Int64s("output cnt", r.selectResp.OutputCounts), zap.Int64s("ndvs", r.selectResp.Ndvs))
+			// logutil.BgLogger().Warn("select resp", zap.Int64s("output cnt", r.selectResp.OutputCounts), zap.Int64s("ndvs", r.selectResp.Ndvs))
 			r.feedback.Update(resultSubset.GetStartKey(), r.selectResp.OutputCounts, r.selectResp.Ndvs)
 		}
 		r.partialCount++
diff --git a/executor/analyze.go b/executor/analyze.go
index ece7a77ec26b6..b62135178a05a 100644
--- a/executor/analyze.go
+++ b/executor/analyze.go
@@ -333,6 +333,10 @@ func (e *AnalyzeIndexExec) buildStatsFromResult(result distsql.SelectResult, nee
 		cms = statistics.NewCMSketch(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth]))
 		topn = statistics.NewTopN(int(e.opts[ast.AnalyzeOptNumTopN]))
 	}
+	statsVer := statistics.Version1
+	if e.analyzePB.IdxReq.Version != nil {
+		statsVer = int(*e.analyzePB.IdxReq.Version)
+	}
 	for {
 		data, err := result.NextRaw(context.TODO())
 		if err != nil {
@@ -348,7 +352,7 @@ func (e *AnalyzeIndexExec) buildStatsFromResult(result distsql.SelectResult, nee
 		}
 		respHist := statistics.HistogramFromProto(resp.Hist)
 		e.job.Update(int64(respHist.TotalRowCount()))
-		hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets]))
+		hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets]), statsVer)
 		if err != nil {
 			return nil, nil, nil, err
 		}
@@ -535,7 +539,7 @@ func (e *AnalyzeColumnsExec) buildStats(ranges []*ranger.Range, needExtStats boo
 		if hasPkHist(e.handleCols) {
 			respHist := statistics.HistogramFromProto(resp.PkHist)
 			rowCount = int64(respHist.TotalRowCount())
-			pkHist, err = statistics.MergeHistograms(sc, pkHist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets]))
+			pkHist, err = statistics.MergeHistograms(sc, pkHist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets]), statistics.Version1)
 			if err != nil {
 				return nil, nil, nil, nil, err
 			}
@@ -1212,7 +1216,7 @@ func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult
 	if err != nil {
 		return analyzeResult{Err: err, job: idxExec.job}
 	}
-	hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, idxExec.oldHist, hist, int(idxExec.opts[ast.AnalyzeOptNumBuckets]))
+	hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, idxExec.oldHist, hist, int(idxExec.opts[ast.AnalyzeOptNumBuckets]), statistics.Version1)
 	if err != nil {
 		return analyzeResult{Err: err, job: idxExec.job}
 	}
@@ -1263,7 +1267,7 @@ func analyzePKIncremental(colExec *analyzePKIncrementalExec) analyzeResult {
 		return analyzeResult{Err: err, job: colExec.job}
 	}
 	hist := hists[0]
-	hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, colExec.oldHist, hist, int(colExec.opts[ast.AnalyzeOptNumBuckets]))
+	hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, colExec.oldHist, hist, int(colExec.opts[ast.AnalyzeOptNumBuckets]), statistics.Version1)
 	if err != nil {
 		return analyzeResult{Err: err, job: colExec.job}
 	}
diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go
index 4065fec337b9b..09bd268afafc3 100644
--- a/planner/core/planbuilder.go
+++ b/planner/core/planbuilder.go
@@ -3441,7 +3441,7 @@ func buildShowSchema(s *ast.ShowStmt, isView bool, isSequence bool) (schema *exp
 		names = []string{"Db_name", "Table_name", "Partition_name", "Column_name", "Is_index", "Bucket_id", "Count",
 			"Repeats", "Lower_Bound", "Upper_Bound", "Ndv"}
 		ftypes = []byte{mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeTiny, mysql.TypeLonglong,
-			mysql.TypeLonglong, mysql.TypeLonglong, mysql.TypeVarchar, mysql.TypeVarchar}
+			mysql.TypeLonglong, mysql.TypeLonglong, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeLonglong}
 	case ast.ShowStatsTopN:
 		names = []string{"Db_name", "Table_name", "Partition_name", "Column_name", "Is_index", "Value", "Count"}
 		ftypes = []byte{mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeTiny, mysql.TypeVarchar, mysql.TypeLonglong}
diff --git a/statistics/builder.go b/statistics/builder.go
index 8168c64acfb24..ce57023d80198 100644
--- a/statistics/builder.go
+++ b/statistics/builder.go
@@ -29,15 +29,17 @@ type SortedBuilder struct {
 	bucketIdx       int64
 	Count           int64
 	hist            *Histogram
+	statsVer        int
 }
 
 // NewSortedBuilder creates a new SortedBuilder.
-func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *types.FieldType) *SortedBuilder {
+func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *types.FieldType, statsVer int) *SortedBuilder {
 	return &SortedBuilder{
 		sc:              sc,
 		numBuckets:      numBuckets,
 		valuesPerBucket: 1,
 		hist:            NewHistogram(id, 0, 0, 0, tp, int(numBuckets), 0),
+		statsVer:        statsVer,
 	}
 }
 
@@ -49,8 +51,16 @@ func (b *SortedBuilder) Hist() *Histogram {
 // Iterate updates the histogram incrementally.
 func (b *SortedBuilder) Iterate(data types.Datum) error {
 	b.Count++
+	appendBucket := b.hist.AppendBucket
+	updateLastBucket := b.hist.updateLastBucket
+	if b.statsVer == Version2 {
+		updateLastBucket = b.hist.updateLastBucketV2
+		appendBucket = func(lower, upper *types.Datum, count, repeat int64) {
+			b.hist.AppendBucketWithNDV(lower, upper, count, repeat, 1)
+		}
+	}
 	if b.Count == 1 {
-		b.hist.AppendBucketWithNDV(&data, &data, 1, 1, 1)
+		appendBucket(&data, &data, 1, 1)
 		b.hist.NDV = 1
 		return nil
 	}
@@ -66,7 +76,7 @@ func (b *SortedBuilder) Iterate(data types.Datum) error {
 		b.hist.Buckets[b.bucketIdx].Repeat++
 	} else if b.hist.Buckets[b.bucketIdx].Count+1-b.lastNumber <= b.valuesPerBucket {
 		// The bucket still have room to store a new item, update the bucket.
-		b.hist.updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1)
+		updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1)
 		b.hist.NDV++
 	} else {
 		// All buckets are full, we should merge buckets.
@@ -82,11 +92,11 @@ func (b *SortedBuilder) Iterate(data types.Datum) error {
 		}
 		// We may merge buckets, so we should check it again.
 		if b.hist.Buckets[b.bucketIdx].Count+1-b.lastNumber <= b.valuesPerBucket {
-			b.hist.updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1)
+			updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1)
 		} else {
 			b.lastNumber = b.hist.Buckets[b.bucketIdx].Count
 			b.bucketIdx++
-			b.hist.AppendBucketWithNDV(&data, &data, b.lastNumber+1, 1, 1)
+			appendBucket(&data, &data, b.lastNumber+1, 1)
 		}
 		b.hist.NDV++
 	}
diff --git a/statistics/feedback.go b/statistics/feedback.go
index bc7ee2ae1ac1f..1f042b85d41cc 100644
--- a/statistics/feedback.go
+++ b/statistics/feedback.go
@@ -743,7 +743,7 @@ func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int6
 }
 
 // UpdateHistogram updates the histogram according buckets.
-func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram {
+func UpdateHistogram(h *Histogram, feedback *QueryFeedback, statsVer int) *Histogram {
 	buckets, isNewBuckets, totalCount := splitBuckets(h, feedback)
 	ndvs := make([]int64, len(buckets))
 	for i := range buckets {
diff --git a/statistics/feedback_test.go b/statistics/feedback_test.go
index 345f1435fe46d..12b87ffa57c3f 100644
--- a/statistics/feedback_test.go
+++ b/statistics/feedback_test.go
@@ -73,7 +73,7 @@ func (s *testFeedbackSuite) TestUpdateHistogram(c *C) {
 	originBucketCount := defaultBucketCount
 	defaultBucketCount = 7
 	defer func() { defaultBucketCount = originBucketCount }()
-	c.Assert(UpdateHistogram(q.Hist, q).ToString(0), Equals,
+	c.Assert(UpdateHistogram(q.Hist, q, Version2).ToString(0), Equals,
 		"column:0 ndv:10053 totColSize:0\n"+
 			"num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0 ndv: 2\n"+
 			"num: 7 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 2\n"+
diff --git a/statistics/handle/update.go b/statistics/handle/update.go
index c0dc411687857..d06f0b8268d28 100644
--- a/statistics/handle/update.go
+++ b/statistics/handle/update.go
@@ -573,7 +573,7 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) {
 					ranFB = statistics.CleanRangeFeedbackByTopN(ranFB, idx.TopN)
 				}
 				newIdx.CMSketch, newIdx.TopN = statistics.UpdateCMSketchAndTopN(idx.CMSketch, idx.TopN, eqFB)
-				newIdx.Histogram = *statistics.UpdateHistogram(&idx.Histogram, &statistics.QueryFeedback{Feedback: ranFB})
+				newIdx.Histogram = *statistics.UpdateHistogram(&idx.Histogram, &statistics.QueryFeedback{Feedback: ranFB}, int(idx.StatsVer))
 				newIdx.Histogram.PreCalculateScalar()
 				newIdx.Flag = statistics.ResetAnalyzeFlag(newIdx.Flag)
 				newTblStats.Indices[fb.Hist.ID] = &newIdx
@@ -587,7 +587,7 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) {
 				_, ranFB := statistics.SplitFeedbackByQueryType(fb.Feedback)
 				newFB := &statistics.QueryFeedback{Feedback: ranFB}
 				newFB = newFB.DecodeIntValues()
-				newCol.Histogram = *statistics.UpdateHistogram(&col.Histogram, newFB)
+				newCol.Histogram = *statistics.UpdateHistogram(&col.Histogram, newFB, statistics.Version1)
 				newCol.Flag = statistics.ResetAnalyzeFlag(newCol.Flag)
 				newTblStats.Columns[fb.Hist.ID] = &newCol
 			}
@@ -763,7 +763,7 @@ func (h *Handle) deleteOutdatedFeedback(tableID, histID, isIndex int64) error {
 }
 
 func (h *Handle) dumpStatsUpdateToKV(tableID, isIndex int64, q *statistics.QueryFeedback, hist *statistics.Histogram, cms *statistics.CMSketch, topN *statistics.TopN, statsVersion int64) error {
-	hist = statistics.UpdateHistogram(hist, q)
+	hist = statistics.UpdateHistogram(hist, q, int(statsVersion))
 	err := h.SaveStatsToStorage(tableID, -1, int(isIndex), hist, cms, topN, int(statsVersion), 0)
 	metrics.UpdateStatsCounter.WithLabelValues(metrics.RetLabel(err)).Inc()
 	return errors.Trace(err)
diff --git a/statistics/histogram.go b/statistics/histogram.go
index a7a84bb5b2ca6..6411829b29f8b 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -214,7 +214,7 @@ func (c *Column) AvgColSizeListInDisk(count int64) float64 {
 
 // AppendBucket appends a bucket into `hg`.
 func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64) {
-	hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat, NDV: 1})
+	hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat, NDV: 0})
 	hg.Bounds.AppendDatum(0, lower)
 	hg.Bounds.AppendDatum(0, upper)
 }
@@ -232,7 +232,15 @@ func (hg *Histogram) updateLastBucket(upper *types.Datum, count, repeat int64) {
 	hg.Bounds.AppendDatum(0, upper)
 	hg.Buckets[len-1].Count = count
 	hg.Buckets[len-1].Repeat = repeat
-	hg.Buckets[len-1].NDV++
+}
+
+func (hg *Histogram) updateLastBucketV2(upper *types.Datum, count, repeat int64) {
+	hg.updateLastBucket(upper, count, repeat)
+	l := hg.Len()
+	// The sampling case doesn't hold NDV since the low sampling rate. So check the NDV here.
+	if hg.Buckets[l-1].NDV > 0 {
+		hg.Buckets[l-1].NDV++
+	}
 }
 
 // DecodeTo decodes the histogram bucket values into `Tp`.
@@ -383,6 +391,26 @@ func (hg *Histogram) ToString(idxCols int) string {
 
 // equalRowCount estimates the row count where the column equals to value.
 func (hg *Histogram) equalRowCount(value types.Datum) float64 {
+	index, match := hg.Bounds.LowerBound(0, &value)
+	// Since we store the lower and upper bound together, if the index is an odd number, then it points to a upper bound.
+	if index%2 == 1 {
+		if match {
+			return float64(hg.Buckets[index/2].Repeat)
+		}
+		return hg.notNullCount() / float64(hg.NDV)
+	}
+	if match {
+		cmp := chunk.GetCompareFunc(hg.Tp)
+		if cmp(hg.Bounds.GetRow(index), 0, hg.Bounds.GetRow(index+1), 0) == 0 {
+			return float64(hg.Buckets[index/2].Repeat)
+		}
+		return hg.notNullCount() / float64(hg.NDV)
+	}
+	return 0
+}
+
+// equalRowCountV2 estimates the row count where the column equals to value.
+func (hg *Histogram) equalRowCountV2(value types.Datum) float64 {
 	index, match := hg.Bounds.LowerBound(0, &value)
 	// Since we store the lower and upper bound together, if the index is an odd number, then it points to a upper bound.
 	if index%2 == 1 {
@@ -703,7 +731,7 @@ func (hg *Histogram) IsIndexHist() bool {
 }
 
 // MergeHistograms merges two histograms.
-func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int) (*Histogram, error) {
+func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int, statsVer int) (*Histogram, error) {
 	if lh.Len() == 0 {
 		return rh, nil
 	}
@@ -719,7 +747,9 @@ func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram,
 	offset := int64(0)
 	if cmp == 0 {
 		lh.NDV--
-		lh.Buckets[len(lh.Buckets)-1].NDV--
+		if rh.Buckets[0].NDV > 0 {
+			lh.Buckets[lLen-1].NDV += rh.Buckets[0].NDV - 1
+		}
 		lh.updateLastBucket(rh.GetUpper(0), lh.Buckets[lLen-1].Count+rh.Buckets[0].Count, rh.Buckets[0].Repeat)
 		offset = rh.Buckets[0].Count
 		rh.popFirstBucket()
@@ -746,6 +776,10 @@ func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram,
 		rAvg *= 2
 	}
 	for i := 0; i < rh.Len(); i++ {
+		if statsVer == Version2 {
+			lh.AppendBucketWithNDV(rh.GetLower(i), rh.GetUpper(i), rh.Buckets[i].Count+lCount-offset, rh.Buckets[i].Repeat, rh.Buckets[i].NDV)
+			continue
+		}
 		lh.AppendBucket(rh.GetLower(i), rh.GetUpper(i), rh.Buckets[i].Count+lCount-offset, rh.Buckets[i].Repeat)
 	}
 	for lh.Len() > bucketSize {
@@ -1036,9 +1070,17 @@ func (idx *Index) equalRowCount(b []byte, modifyCount int64) float64 {
 	if idx.NDV > 0 && idx.outOfRange(val) {
 		return outOfRangeEQSelectivity(idx.NDV, modifyCount, int64(idx.TotalRowCount())) * idx.TotalRowCount()
 	}
-	if idx.CMSketch != nil && (len(idx.Histogram.Buckets) == 0 || idx.Histogram.Buckets[0].NDV == 0) {
+	if idx.CMSketch != nil && idx.StatsVer == Version1 {
 		return float64(idx.QueryBytes(b))
 	}
+	// If it's version2, query the top-n first.
+	if idx.StatsVer == Version2 {
+		count, found := idx.TopN.QueryTopN(b)
+		if found {
+			return float64(count)
+		}
+		return idx.Histogram.equalRowCountV2(val)
+	}
 	return idx.Histogram.equalRowCount(val)
 }
 
diff --git a/statistics/sample_test.go b/statistics/sample_test.go
index 34d3f31117db9..1a9647505b547 100644
--- a/statistics/sample_test.go
+++ b/statistics/sample_test.go
@@ -60,7 +60,7 @@ func (s *testSampleSuite) TestCollectColumnStats(c *C) {
 		Sc:              sc,
 		RecordSet:       s.rs,
 		ColLen:          1,
-		PkBuilder:       NewSortedBuilder(sc, 256, 1, types.NewFieldType(mysql.TypeLonglong)),
+		PkBuilder:       NewSortedBuilder(sc, 256, 1, types.NewFieldType(mysql.TypeLonglong), Version2),
 		MaxSampleSize:   10000,
 		MaxBucketSize:   256,
 		MaxFMSketchSize: 1000,
diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go
index 60f4e32a9eb03..460d7d1a2c44f 100644
--- a/statistics/statistics_test.go
+++ b/statistics/statistics_test.go
@@ -180,7 +180,7 @@ func encodeKey(key types.Datum) types.Datum {
 }
 
 func buildPK(sctx sessionctx.Context, numBuckets, id int64, records sqlexec.RecordSet) (int64, *Histogram, error) {
-	b := NewSortedBuilder(sctx.GetSessionVars().StmtCtx, numBuckets, id, types.NewFieldType(mysql.TypeLonglong))
+	b := NewSortedBuilder(sctx.GetSessionVars().StmtCtx, numBuckets, id, types.NewFieldType(mysql.TypeLonglong), Version1)
 	ctx := context.Background()
 	for {
 		req := records.NewChunk()
@@ -204,7 +204,7 @@ func buildPK(sctx sessionctx.Context, numBuckets, id int64, records sqlexec.Reco
 }
 
 func buildIndex(sctx sessionctx.Context, numBuckets, id int64, records sqlexec.RecordSet) (int64, *Histogram, *CMSketch, error) {
-	b := NewSortedBuilder(sctx.GetSessionVars().StmtCtx, numBuckets, id, types.NewFieldType(mysql.TypeBlob))
+	b := NewSortedBuilder(sctx.GetSessionVars().StmtCtx, numBuckets, id, types.NewFieldType(mysql.TypeBlob), Version1)
 	cms := NewCMSketch(8, 2048)
 	ctx := context.Background()
 	req := records.NewChunk()
@@ -403,7 +403,7 @@ func (s *testStatisticsSuite) TestMergeHistogram(c *C) {
 	for _, t := range tests {
 		lh := mockHistogram(t.leftLower, t.leftNum)
 		rh := mockHistogram(t.rightLower, t.rightNum)
-		h, err := MergeHistograms(sc, lh, rh, bucketCount)
+		h, err := MergeHistograms(sc, lh, rh, bucketCount, Version1)
 		c.Assert(err, IsNil)
 		c.Assert(h.NDV, Equals, t.ndv)
 		c.Assert(h.Len(), Equals, t.bucketNum)
diff --git a/statistics/table.go b/statistics/table.go
index 79cba7ae26456..afaa2afffe083 100644
--- a/statistics/table.go
+++ b/statistics/table.go
@@ -428,7 +428,7 @@ func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idx
 	}
 	var result float64
 	var err error
-	if idx.CMSketch != nil && idx.StatsVer != Version0 {
+	if idx.CMSketch != nil && idx.StatsVer == Version1 {
 		result, err = coll.getIndexRowCount(sc, idxID, indexRanges)
 	} else {
 		result, err = idx.GetRowCount(sc, indexRanges, coll.ModifyCount)
diff --git a/store/mockstore/mocktikv/analyze.go b/store/mockstore/mocktikv/analyze.go
index fa0d9384694c8..a575f5536015d 100644
--- a/store/mockstore/mocktikv/analyze.go
+++ b/store/mockstore/mocktikv/analyze.go
@@ -81,7 +81,7 @@ func (h *rpcHandler) handleAnalyzeIndexReq(req *coprocessor.Request, analyzeReq
 		execDetail:     new(execDetail),
 		hdStatus:       tablecodec.HandleNotNeeded,
 	}
-	statsBuilder := statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob))
+	statsBuilder := statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob), statistics.Version1)
 	var cms *statistics.CMSketch
 	if analyzeReq.IdxReq.CmsketchDepth != nil && analyzeReq.IdxReq.CmsketchWidth != nil {
 		cms = statistics.NewCMSketch(*analyzeReq.IdxReq.CmsketchDepth, *analyzeReq.IdxReq.CmsketchWidth)
@@ -212,7 +212,7 @@ func (h *rpcHandler) handleAnalyzeColumnsReq(req *coprocessor.Request, analyzeRe
 		ColsFieldType:   fts,
 	}
 	if pkID != -1 {
-		builder.PkBuilder = statistics.NewSortedBuilder(sc, builder.MaxBucketSize, pkID, types.NewFieldType(mysql.TypeBlob))
+		builder.PkBuilder = statistics.NewSortedBuilder(sc, builder.MaxBucketSize, pkID, types.NewFieldType(mysql.TypeBlob), statistics.Version1)
 	}
 	if colReq.CmsketchWidth != nil && colReq.CmsketchDepth != nil {
 		builder.CMSketchWidth = *colReq.CmsketchWidth
diff --git a/store/mockstore/unistore/cophandler/analyze.go b/store/mockstore/unistore/cophandler/analyze.go
index 329335a70af5e..f2e980023163b 100644
--- a/store/mockstore/unistore/cophandler/analyze.go
+++ b/store/mockstore/unistore/cophandler/analyze.go
@@ -83,7 +83,7 @@ func handleAnalyzeIndexReq(dbReader *dbreader.DBReader, rans []kv.KeyRange, anal
 	}
 	processor := &analyzeIndexProcessor{
 		colLen:       int(analyzeReq.IdxReq.NumColumns),
-		statsBuilder: statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob)),
+		statsBuilder: statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob), int(statsVer)),
 		statsVer:     statsVer,
 	}
 	if analyzeReq.IdxReq.TopNSize != nil {
@@ -133,9 +133,13 @@ func handleAnalyzeIndexReq(dbReader *dbreader.DBReader, rans []kv.KeyRange, anal
 }
 
 func handleAnalyzeCommonHandleReq(dbReader *dbreader.DBReader, rans []kv.KeyRange, analyzeReq *tipb.AnalyzeReq, startTS uint64) (*coprocessor.Response, error) {
+	statsVer := statistics.Version1
+	if analyzeReq.IdxReq.Version != nil {
+		statsVer = int(*analyzeReq.IdxReq.Version)
+	}
 	processor := &analyzeCommonHandleProcessor{
 		colLen:       int(analyzeReq.IdxReq.NumColumns),
-		statsBuilder: statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob)),
+		statsBuilder: statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob), statsVer),
 	}
 	if analyzeReq.IdxReq.CmsketchDepth != nil && analyzeReq.IdxReq.CmsketchWidth != nil {
 		processor.cms = statistics.NewCMSketch(*analyzeReq.IdxReq.CmsketchDepth, *analyzeReq.IdxReq.CmsketchWidth)
@@ -308,8 +312,12 @@ func handleAnalyzeColumnsReq(dbReader *dbreader.DBReader, rans []kv.KeyRange, an
 		Collators:       collators,
 		ColsFieldType:   fts,
 	}
+	statsVer := statistics.Version1
+	if analyzeReq.ColReq.Version != nil {
+		statsVer = int(*analyzeReq.ColReq.Version)
+	}
 	if pkID != -1 {
-		builder.PkBuilder = statistics.NewSortedBuilder(sc, builder.MaxBucketSize, pkID, types.NewFieldType(mysql.TypeBlob))
+		builder.PkBuilder = statistics.NewSortedBuilder(sc, builder.MaxBucketSize, pkID, types.NewFieldType(mysql.TypeBlob), statsVer)
 	}
 	if colReq.CmsketchWidth != nil && colReq.CmsketchDepth != nil {
 		builder.CMSketchWidth = *colReq.CmsketchWidth
diff --git a/store/mockstore/unistore/cophandler/closure_exec.go b/store/mockstore/unistore/cophandler/closure_exec.go
index 48c56389b59c6..3b0f70d3a6cae 100644
--- a/store/mockstore/unistore/cophandler/closure_exec.go
+++ b/store/mockstore/unistore/cophandler/closure_exec.go
@@ -844,7 +844,7 @@ type idxScanCtx struct {
 	colInfos         []rowcodec.ColInfo
 	primaryColumnIds []int64
 	execDetail       *execDetail
- 	collectNDV       bool
+	collectNDV       bool
 	prevVals         [][]byte
 }
 

From 05d588ba4a2dc4207165156750e3862f1640c618 Mon Sep 17 00:00:00 2001
From: Yiding Cui <winoros@gmail.com>
Date: Tue, 15 Dec 2020 13:58:08 +0800
Subject: [PATCH 4/9] fix ndv when extract topn out of hist

---
 statistics/histogram.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/statistics/histogram.go b/statistics/histogram.go
index 6411829b29f8b..658a84b1ed48e 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -362,6 +362,9 @@ func (hg *Histogram) RemoveIdxVals(idxValCntPairs []TopNMeta) {
 				break
 			}
 			totalSubCnt += int64(idxValCntPairs[pairIdx].Count)
+			if hg.Buckets[bktIdx].NDV > 0 {
+				hg.Buckets[bktIdx].NDV--
+			}
 			pairIdx++
 			if cmpResult == 0 {
 				hg.Buckets[bktIdx].Repeat = 0

From 73262224188f52e01acb68dcf70e80b7e6cf3e9b Mon Sep 17 00:00:00 2001
From: Yiding Cui <winoros@gmail.com>
Date: Tue, 22 Dec 2020 02:03:16 +0800
Subject: [PATCH 5/9] address comments and fix

---
 session/bootstrap.go                          |  2 +-
 statistics/builder.go                         | 13 +++--
 statistics/histogram.go                       | 54 ++++++-------------
 statistics/statistics_test.go                 |  8 +--
 statistics/table.go                           |  3 --
 .../unistore/cophandler/cop_handler.go        |  2 +-
 6 files changed, 27 insertions(+), 55 deletions(-)

diff --git a/session/bootstrap.go b/session/bootstrap.go
index c0b037dcdcadd..f752e7c048eea 100644
--- a/session/bootstrap.go
+++ b/session/bootstrap.go
@@ -1240,7 +1240,7 @@ func writeMemoryQuotaQuery(s Session) {
 }
 
 func upgradeToVer57(s Session, ver int64) {
-	if ver >= version53 {
+	if ver >= version57 {
 		return
 	}
 	doReentrantDDL(s, "ALTER TABLE mysql.stats_buckets ADD COLUMN `ndv` bigint not null default 0", infoschema.ErrColumnExists)
diff --git a/statistics/builder.go b/statistics/builder.go
index ce57023d80198..a21b39be7bc87 100644
--- a/statistics/builder.go
+++ b/statistics/builder.go
@@ -30,6 +30,7 @@ type SortedBuilder struct {
 	Count           int64
 	hist            *Histogram
 	statsVer        int
+	needBucketNDV   bool
 }
 
 // NewSortedBuilder creates a new SortedBuilder.
@@ -39,7 +40,7 @@ func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *ty
 		numBuckets:      numBuckets,
 		valuesPerBucket: 1,
 		hist:            NewHistogram(id, 0, 0, 0, tp, int(numBuckets), 0),
-		statsVer:        statsVer,
+		needBucketNDV:        statsVer == Version2,
 	}
 }
 
@@ -52,9 +53,7 @@ func (b *SortedBuilder) Hist() *Histogram {
 func (b *SortedBuilder) Iterate(data types.Datum) error {
 	b.Count++
 	appendBucket := b.hist.AppendBucket
-	updateLastBucket := b.hist.updateLastBucket
-	if b.statsVer == Version2 {
-		updateLastBucket = b.hist.updateLastBucketV2
+	if b.needBucketNDV {
 		appendBucket = func(lower, upper *types.Datum, count, repeat int64) {
 			b.hist.AppendBucketWithNDV(lower, upper, count, repeat, 1)
 		}
@@ -76,7 +75,7 @@ func (b *SortedBuilder) Iterate(data types.Datum) error {
 		b.hist.Buckets[b.bucketIdx].Repeat++
 	} else if b.hist.Buckets[b.bucketIdx].Count+1-b.lastNumber <= b.valuesPerBucket {
 		// The bucket still have room to store a new item, update the bucket.
-		updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1)
+		b.hist.updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1, b.needBucketNDV)
 		b.hist.NDV++
 	} else {
 		// All buckets are full, we should merge buckets.
@@ -92,7 +91,7 @@ func (b *SortedBuilder) Iterate(data types.Datum) error {
 		}
 		// We may merge buckets, so we should check it again.
 		if b.hist.Buckets[b.bucketIdx].Count+1-b.lastNumber <= b.valuesPerBucket {
-			updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1)
+			b.hist.updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1, b.needBucketNDV)
 		} else {
 			b.lastNumber = b.hist.Buckets[b.bucketIdx].Count
 			b.bucketIdx++
@@ -160,7 +159,7 @@ func BuildColumnHist(ctx sessionctx.Context, numBuckets, id int64, collector *Sa
 			}
 		} else if totalCount-float64(lastCount) <= valuesPerBucket {
 			// The bucket still have room to store a new item, update the bucket.
-			hg.updateLastBucket(&samples[i].Value, int64(totalCount), int64(ndvFactor))
+			hg.updateLastBucket(&samples[i].Value, int64(totalCount), int64(ndvFactor), false)
 		} else {
 			lastCount = hg.Buckets[bucketIdx].Count
 			// The bucket is full, store the item in the next bucket.
diff --git a/statistics/histogram.go b/statistics/histogram.go
index 658a84b1ed48e..02b775ce47004 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -226,21 +226,16 @@ func (hg *Histogram) AppendBucketWithNDV(lower *types.Datum, upper *types.Datum,
 	hg.Bounds.AppendDatum(0, upper)
 }
 
-func (hg *Histogram) updateLastBucket(upper *types.Datum, count, repeat int64) {
-	len := hg.Len()
-	hg.Bounds.TruncateTo(2*len - 1)
-	hg.Bounds.AppendDatum(0, upper)
-	hg.Buckets[len-1].Count = count
-	hg.Buckets[len-1].Repeat = repeat
-}
-
-func (hg *Histogram) updateLastBucketV2(upper *types.Datum, count, repeat int64) {
-	hg.updateLastBucket(upper, count, repeat)
+func (hg *Histogram) updateLastBucket(upper *types.Datum, count, repeat int64, needBucketNDV bool) {
 	l := hg.Len()
+	hg.Bounds.TruncateTo(2*l-1)
+	hg.Bounds.AppendDatum(0, upper)
 	// The sampling case doesn't hold NDV since the low sampling rate. So check the NDV here.
-	if hg.Buckets[l-1].NDV > 0 {
+	if needBucketNDV && hg.Buckets[l-1].NDV > 0 {
 		hg.Buckets[l-1].NDV++
 	}
+	hg.Buckets[l-1].Count = count
+	hg.Buckets[l-1].Repeat = repeat
 }
 
 // DecodeTo decodes the histogram bucket values into `Tp`.
@@ -393,34 +388,14 @@ func (hg *Histogram) ToString(idxCols int) string {
 }
 
 // equalRowCount estimates the row count where the column equals to value.
-func (hg *Histogram) equalRowCount(value types.Datum) float64 {
-	index, match := hg.Bounds.LowerBound(0, &value)
-	// Since we store the lower and upper bound together, if the index is an odd number, then it points to a upper bound.
-	if index%2 == 1 {
-		if match {
-			return float64(hg.Buckets[index/2].Repeat)
-		}
-		return hg.notNullCount() / float64(hg.NDV)
-	}
-	if match {
-		cmp := chunk.GetCompareFunc(hg.Tp)
-		if cmp(hg.Bounds.GetRow(index), 0, hg.Bounds.GetRow(index+1), 0) == 0 {
-			return float64(hg.Buckets[index/2].Repeat)
-		}
-		return hg.notNullCount() / float64(hg.NDV)
-	}
-	return 0
-}
-
-// equalRowCountV2 estimates the row count where the column equals to value.
-func (hg *Histogram) equalRowCountV2(value types.Datum) float64 {
+func (hg *Histogram) equalRowCount(value types.Datum, hasBucketNDV bool) float64 {
 	index, match := hg.Bounds.LowerBound(0, &value)
 	// Since we store the lower and upper bound together, if the index is an odd number, then it points to a upper bound.
 	if index%2 == 1 {
 		if match {
 			return float64(hg.Buckets[index/2].Repeat)
 		}
-		if hg.Buckets[index/2].NDV > 0 {
+		if hasBucketNDV && hg.Buckets[index/2].NDV > 0 {
 			return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV)
 		}
 		return hg.notNullCount() / float64(hg.NDV)
@@ -430,7 +405,7 @@ func (hg *Histogram) equalRowCountV2(value types.Datum) float64 {
 		if cmp(hg.Bounds.GetRow(index), 0, hg.Bounds.GetRow(index+1), 0) == 0 {
 			return float64(hg.Buckets[index/2].Repeat)
 		}
-		if hg.Buckets[index/2].NDV > 0 {
+		if hasBucketNDV && hg.Buckets[index/2].NDV > 0 {
 			return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV)
 		}
 		return hg.notNullCount() / float64(hg.NDV)
@@ -439,8 +414,9 @@ func (hg *Histogram) equalRowCountV2(value types.Datum) float64 {
 }
 
 // greaterRowCount estimates the row count where the column greater than value.
+// It's deprecated. Only used for test.
 func (hg *Histogram) greaterRowCount(value types.Datum) float64 {
-	gtCount := hg.notNullCount() - hg.lessRowCount(value) - hg.equalRowCount(value)
+	gtCount := hg.notNullCount() - hg.lessRowCount(value) - hg.equalRowCount(value, false)
 	return math.Max(0, gtCount)
 }
 
@@ -753,7 +729,7 @@ func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram,
 		if rh.Buckets[0].NDV > 0 {
 			lh.Buckets[lLen-1].NDV += rh.Buckets[0].NDV - 1
 		}
-		lh.updateLastBucket(rh.GetUpper(0), lh.Buckets[lLen-1].Count+rh.Buckets[0].Count, rh.Buckets[0].Repeat)
+		lh.updateLastBucket(rh.GetUpper(0), lh.Buckets[lLen-1].Count+rh.Buckets[0].Count, rh.Buckets[0].Repeat, false)
 		offset = rh.Buckets[0].Count
 		rh.popFirstBucket()
 	}
@@ -925,7 +901,7 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, mo
 		count, err := queryValue(sc, c.CMSketch, c.TopN, val)
 		return float64(count), errors.Trace(err)
 	}
-	return c.Histogram.equalRowCount(val), nil
+	return c.Histogram.equalRowCount(val, false), nil
 }
 
 // GetColumnRowCount estimates the row count by a slice of Range.
@@ -1082,9 +1058,9 @@ func (idx *Index) equalRowCount(b []byte, modifyCount int64) float64 {
 		if found {
 			return float64(count)
 		}
-		return idx.Histogram.equalRowCountV2(val)
+		return idx.Histogram.equalRowCount(val, true)
 	}
-	return idx.Histogram.equalRowCount(val)
+	return idx.Histogram.equalRowCount(val, false)
 }
 
 // QueryBytes is used to query the count of specified bytes.
diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go
index 460d7d1a2c44f..837cdae704a26 100644
--- a/statistics/statistics_test.go
+++ b/statistics/statistics_test.go
@@ -258,7 +258,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
 	checkRepeats(c, col)
 	col.PreCalculateScalar()
 	c.Check(col.Len(), Equals, 226)
-	count := col.equalRowCount(types.NewIntDatum(1000))
+	count := col.equalRowCount(types.NewIntDatum(1000), false)
 	c.Check(int(count), Equals, 0)
 	count = col.lessRowCount(types.NewIntDatum(1000))
 	c.Check(int(count), Equals, 10000)
@@ -270,7 +270,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
 	c.Check(int(count), Equals, 100000)
 	count = col.greaterRowCount(types.NewIntDatum(200000000))
 	c.Check(count, Equals, 0.0)
-	count = col.equalRowCount(types.NewIntDatum(200000000))
+	count = col.equalRowCount(types.NewIntDatum(200000000), false)
 	c.Check(count, Equals, 0.0)
 	count = col.BetweenRowCount(types.NewIntDatum(3000), types.NewIntDatum(3500))
 	c.Check(int(count), Equals, 4994)
@@ -300,7 +300,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
 	checkRepeats(c, col)
 	col.PreCalculateScalar()
 	c.Check(int(tblCount), Equals, 100000)
-	count = col.equalRowCount(encodeKey(types.NewIntDatum(10000)))
+	count = col.equalRowCount(encodeKey(types.NewIntDatum(10000)), false)
 	c.Check(int(count), Equals, 1)
 	count = col.lessRowCount(encodeKey(types.NewIntDatum(20000)))
 	c.Check(int(count), Equals, 19999)
@@ -317,7 +317,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
 	checkRepeats(c, col)
 	col.PreCalculateScalar()
 	c.Check(int(tblCount), Equals, 100000)
-	count = col.equalRowCount(types.NewIntDatum(10000))
+	count = col.equalRowCount(types.NewIntDatum(10000), false)
 	c.Check(int(count), Equals, 1)
 	count = col.lessRowCount(types.NewIntDatum(20000))
 	c.Check(int(count), Equals, 20000)
diff --git a/statistics/table.go b/statistics/table.go
index afaa2afffe083..0d104084a4e55 100644
--- a/statistics/table.go
+++ b/statistics/table.go
@@ -632,9 +632,6 @@ func (coll *HistColl) getEqualCondSelectivity(sc *stmtctx.StatementContext, idx
 		}
 		return outOfRangeEQSelectivity(ndv, coll.ModifyCount, int64(idx.TotalRowCount())), nil
 	}
-	if coverAll && len(idx.Histogram.Buckets) > 0 && idx.Histogram.Buckets[0].NDV > 0 {
-		return idx.Histogram.equalRowCount(val), nil
-	}
 
 	minRowCount, crossValidationSelectivity, err := coll.crossValidationSelectivity(sc, idx, usedColsLen, idxPointRange)
 	if err != nil {
diff --git a/store/mockstore/unistore/cophandler/cop_handler.go b/store/mockstore/unistore/cophandler/cop_handler.go
index 1dc48d6f3e657..40cb1a8c7203a 100644
--- a/store/mockstore/unistore/cophandler/cop_handler.go
+++ b/store/mockstore/unistore/cophandler/cop_handler.go
@@ -147,7 +147,7 @@ func handleCopDAGRequest(dbReader *dbreader.DBReader, lockStore *lockstore.MemSt
 		}
 		return nil
 	}
-	return buildResp(chunks, closureExec, []int64{}, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime))
+	return buildResp(chunks, closureExec, closureExec.ndvs, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime))
 }
 
 func buildDAG(reader *dbreader.DBReader, lockStore *lockstore.MemStore, req *coprocessor.Request) (*dagContext, *tipb.DAGRequest, error) {

From da257227f5fcd726e65b99492bbd656d11c0680d Mon Sep 17 00:00:00 2001
From: Yiding Cui <winoros@gmail.com>
Date: Wed, 23 Dec 2020 14:13:08 +0800
Subject: [PATCH 6/9] address comments

---
 distsql/select_result.go    |  1 -
 go.mod                      |  2 +-
 go.sum                      |  2 ++
 session/bootstrap.go        |  1 +
 statistics/builder.go       |  3 +--
 statistics/handle/update.go |  6 ++----
 statistics/histogram.go     | 10 +++++-----
 7 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/distsql/select_result.go b/distsql/select_result.go
index 7208928732f60..fd1cd7c846931 100644
--- a/distsql/select_result.go
+++ b/distsql/select_result.go
@@ -147,7 +147,6 @@ func (r *selectResult) fetchResp(ctx context.Context) error {
 			sc.AppendWarning(dbterror.ClassTiKV.Synthesize(terror.ErrCode(warning.Code), warning.Msg))
 		}
 		if r.feedback != nil {
-			// logutil.BgLogger().Warn("select resp", zap.Int64s("output cnt", r.selectResp.OutputCounts), zap.Int64s("ndvs", r.selectResp.Ndvs))
 			r.feedback.Update(resultSubset.GetStartKey(), r.selectResp.OutputCounts, r.selectResp.Ndvs)
 		}
 		r.partialCount++
diff --git a/go.mod b/go.mod
index 5d618f226c23f..3fe6961f639b7 100644
--- a/go.mod
+++ b/go.mod
@@ -51,7 +51,7 @@ require (
 	github.com/pingcap/sysutil v0.0.0-20201130064824-f0c8aa6a6966
 	github.com/pingcap/tidb-lightning v4.0.9-0.20201106041742-a1ac97827a27+incompatible
 	github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible
-	github.com/pingcap/tipb v0.0.0-20201210091214-70edbc366d92
+	github.com/pingcap/tipb v0.0.0-20201215091753-bd0cb2b314a4
 	github.com/prometheus/client_golang v1.5.1
 	github.com/prometheus/client_model v0.2.0
 	github.com/prometheus/common v0.9.1
diff --git a/go.sum b/go.sum
index 3aa9291da5399..7fd465e28e730 100644
--- a/go.sum
+++ b/go.sum
@@ -715,6 +715,8 @@ github.com/pingcap/tipb v0.0.0-20201209065231-aa39b1b86217 h1:Ophn4Ud/QHp1BH0FJO
 github.com/pingcap/tipb v0.0.0-20201209065231-aa39b1b86217/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
 github.com/pingcap/tipb v0.0.0-20201210091214-70edbc366d92 h1:+EomCEPnE5MI0HD10wyoiYj1At57midQ4TagtvV9bmY=
 github.com/pingcap/tipb v0.0.0-20201210091214-70edbc366d92/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
+github.com/pingcap/tipb v0.0.0-20201215091753-bd0cb2b314a4 h1:x64INZ8imEXO3MFcWD99lYlp52V9ZdYrxj74ynfyg3c=
+github.com/pingcap/tipb v0.0.0-20201215091753-bd0cb2b314a4/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
 github.com/pingcap/tiup v1.2.3 h1:8OCQF7sHhT6VqE8pZU1JTSogPA90OFuWWM/B746x0YY=
 github.com/pingcap/tiup v1.2.3/go.mod h1:q8WzflNHjE1U49k2qstTL0clx2pKh8pkOzUFV4RTvQo=
 github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4/go.mod h1:4OwLy04Bl9Ef3GJJCoec+30X3LQs/0/m4HFRt/2LUSA=
diff --git a/session/bootstrap.go b/session/bootstrap.go
index d361089102319..413c143b9eca7 100644
--- a/session/bootstrap.go
+++ b/session/bootstrap.go
@@ -509,6 +509,7 @@ var (
 		upgradeToVer56,
 		upgradeToVer57,
 		upgradeToVer58,
+		upgradeToVer59,
 	}
 )
 
diff --git a/statistics/builder.go b/statistics/builder.go
index a21b39be7bc87..c58c7dc286428 100644
--- a/statistics/builder.go
+++ b/statistics/builder.go
@@ -29,7 +29,6 @@ type SortedBuilder struct {
 	bucketIdx       int64
 	Count           int64
 	hist            *Histogram
-	statsVer        int
 	needBucketNDV   bool
 }
 
@@ -40,7 +39,7 @@ func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *ty
 		numBuckets:      numBuckets,
 		valuesPerBucket: 1,
 		hist:            NewHistogram(id, 0, 0, 0, tp, int(numBuckets), 0),
-		needBucketNDV:        statsVer == Version2,
+		needBucketNDV:   statsVer == Version2,
 	}
 }
 
diff --git a/statistics/handle/update.go b/statistics/handle/update.go
index b27923f6da3a5..409e06efcbda0 100644
--- a/statistics/handle/update.go
+++ b/statistics/handle/update.go
@@ -162,9 +162,9 @@ func (s *SessionStatsCollector) Update(id int64, delta int64, count int64, colSi
 
 var (
 	// MinLogScanCount is the minimum scan count for a feedback to be logged.
-	MinLogScanCount = int64(1)
+	MinLogScanCount = int64(1000)
 	// MinLogErrorRate is the minimum error rate for a feedback to be logged.
-	MinLogErrorRate = 0.0
+	MinLogErrorRate = 0.5
 )
 
 // StoreQueryFeedback merges the feedback into stats collector.
@@ -549,7 +549,6 @@ func (h *Handle) DumpFeedbackToKV(fb *statistics.QueryFeedback) error {
 // feedback locally on this tidb-server, so it could be used more timely.
 func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) {
 	h.sweepList()
-	logutil.BgLogger().Warn("local feedback update")
 	for _, fbs := range h.feedback.Feedbacks {
 		for _, fb := range fbs {
 			h.mu.Lock()
@@ -565,7 +564,6 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) {
 				if !ok || idx.Histogram.Len() == 0 {
 					continue
 				}
-				logutil.BgLogger().Warn("local feedback update index")
 				newIdx := *idx
 				eqFB, ranFB := statistics.SplitFeedbackByQueryType(fb.Feedback)
 				// For StatsVersion higher than Version1, the topn is extracted out of histogram. So we don't update the histogram if the feedback overlaps with some topn.
diff --git a/statistics/histogram.go b/statistics/histogram.go
index a1dc7cd7110b8..8e4ba627cfb61 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -225,7 +225,7 @@ func (hg *Histogram) AppendBucketWithNDV(lower *types.Datum, upper *types.Datum,
 
 func (hg *Histogram) updateLastBucket(upper *types.Datum, count, repeat int64, needBucketNDV bool) {
 	l := hg.Len()
-	hg.Bounds.TruncateTo(2*l-1)
+	hg.Bounds.TruncateTo(2*l - 1)
 	hg.Bounds.AppendDatum(0, upper)
 	// The sampling case doesn't hold NDV since the low sampling rate. So check the NDV here.
 	if needBucketNDV && hg.Buckets[l-1].NDV > 0 {
@@ -392,8 +392,8 @@ func (hg *Histogram) equalRowCount(value types.Datum, hasBucketNDV bool) float64
 		if match {
 			return float64(hg.Buckets[index/2].Repeat)
 		}
-		if hasBucketNDV && hg.Buckets[index/2].NDV > 0 {
-			return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV)
+		if hasBucketNDV && hg.Buckets[index/2].NDV > 1 {
+			return float64(hg.bucketCount(index/2)-hg.Buckets[index/2].Repeat) / float64(hg.Buckets[index/2].NDV-1)
 		}
 		return hg.notNullCount() / float64(hg.NDV)
 	}
@@ -402,8 +402,8 @@ func (hg *Histogram) equalRowCount(value types.Datum, hasBucketNDV bool) float64
 		if cmp(hg.Bounds.GetRow(index), 0, hg.Bounds.GetRow(index+1), 0) == 0 {
 			return float64(hg.Buckets[index/2].Repeat)
 		}
-		if hasBucketNDV && hg.Buckets[index/2].NDV > 0 {
-			return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV)
+		if hasBucketNDV && hg.Buckets[index/2].NDV > 1 {
+			return float64(hg.bucketCount(index/2)-hg.Buckets[index/2].Repeat) / float64(hg.Buckets[index/2].NDV-1)
 		}
 		return hg.notNullCount() / float64(hg.NDV)
 	}

From 1bf01759e6260e85846a7d94d94b365b0144d5ff Mon Sep 17 00:00:00 2001
From: Yiding Cui <winoros@gmail.com>
Date: Wed, 23 Dec 2020 21:40:36 +0800
Subject: [PATCH 7/9] fix tests and adderss comments

---
 .../r/explain_complex_stats.result            | 16 +--
 cmd/explaintest/r/explain_easy_stats.result   | 22 ++---
 cmd/explaintest/r/explain_indexmerge.result   |  6 +-
 cmd/explaintest/r/explain_join_stats.result   | 18 ++--
 executor/analyze_test.go                      | 97 ++++++++++---------
 executor/show_stats_test.go                   | 24 ++---
 planner/core/cbo_test.go                      |  8 +-
 planner/core/testdata/analyze_suite_out.json  | 16 ++-
 .../core/testdata/integration_suite_out.json  |  4 +-
 statistics/feedback.go                        | 15 +--
 statistics/handle/update_test.go              | 92 +++++++++---------
 statistics/histogram.go                       |  2 +-
 statistics/histogram_test.go                  | 28 +++---
 statistics/statistics_test.go                 |  2 +-
 statistics/testdata/stats_suite_out.json      |  8 +-
 .../unistore/cophandler/cop_handler.go        |  5 +
 util/testkit/testkit.go                       |  2 +-
 17 files changed, 183 insertions(+), 182 deletions(-)

diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result
index 7cacac73febc0..aed18d787a36c 100644
--- a/cmd/explaintest/r/explain_complex_stats.result
+++ b/cmd/explaintest/r/explain_complex_stats.result
@@ -115,14 +115,14 @@ PRIMARY KEY (aid,dic)
 load stats 's/explain_complex_stats_rr.json';
 explain SELECT ds, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(dic) as install_device FROM dt use index (cm) WHERE (ds >= '2016-09-01') AND (ds <= '2016-11-03') AND (cm IN ('1062', '1086', '1423', '1424', '1425', '1426', '1427', '1428', '1429', '1430', '1431', '1432', '1433', '1434', '1435', '1436', '1437', '1438', '1439', '1440', '1441', '1442', '1443', '1444', '1445', '1446', '1447', '1448', '1449', '1450', '1451', '1452', '1488', '1489', '1490', '1491', '1492', '1493', '1494', '1495', '1496', '1497', '1550', '1551', '1552', '1553', '1554', '1555', '1556', '1557', '1558', '1559', '1597', '1598', '1599', '1600', '1601', '1602', '1603', '1604', '1605', '1606', '1607', '1608', '1609', '1610', '1611', '1612', '1613', '1614', '1615', '1616', '1623', '1624', '1625', '1626', '1627', '1628', '1629', '1630', '1631', '1632', '1709', '1719', '1720', '1843', '2813', '2814', '2815', '2816', '2817', '2818', '2819', '2820', '2821', '2822', '2823', '2824', '2825', '2826', '2827', '2828', '2829', '2830', '2831', '2832', '2833', '2834', '2835', '2836', '2837', '2838', '2839', '2840', '2841', '2842', '2843', '2844', '2845', '2846', '2847', '2848', '2849', '2850', '2851', '2852', '2853', '2854', '2855', '2856', '2857', '2858', '2859', '2860', '2861', '2862', '2863', '2864', '2865', '2866', '2867', '2868', '2869', '2870', '2871', '2872', '3139', '3140', '3141', '3142', '3143', '3144', '3145', '3146', '3147', '3148', '3149', '3150', '3151', '3152', '3153', '3154', '3155', '3156', '3157', '3158', '3386', '3387', '3388', '3389', '3390', '3391', '3392', '3393', '3394', '3395', '3664', '3665', '3666', '3667', '3668', '3670', '3671', '3672', '3673', '3674', '3676', '3677', '3678', '3679', '3680', '3681', '3682', '3683', '3684', '3685', '3686', '3687', '3688', '3689', '3690', '3691', '3692', '3693', '3694', '3695', '3696', '3697', '3698', '3699', '3700', '3701', '3702', '3703', '3704', '3705', '3706', '3707', '3708', '3709', '3710', '3711', '3712', '3713', '3714', '3715', '3960', '3961', '3962', '3963', '3964', '3965', '3966', '3967', '3968', '3978', '3979', '3980', '3981', '3982', '3983', '3984', '3985', '3986', '3987', '4208', '4209', '4210', '4211', '4212', '4304', '4305', '4306', '4307', '4308', '4866', '4867', '4868', '4869', '4870', '4871', '4872', '4873', '4874', '4875')) GROUP BY ds, p1, p2, p3, p4, p5, p6_md5, p7_md5 ORDER BY ds2 DESC;
 id	estRows	task	access object	operator info
-Projection_7	308.93	root		test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, Column#21
-└─Sort_8	308.93	root		test.dt.ds2:desc
-  └─HashAgg_16	308.93	root		group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(Column#32)->Column#21, funcs:firstrow(test.dt.ds)->test.dt.ds, funcs:firstrow(Column#34)->test.dt.ds2, funcs:firstrow(test.dt.p1)->test.dt.p1, funcs:firstrow(test.dt.p2)->test.dt.p2, funcs:firstrow(test.dt.p3)->test.dt.p3, funcs:firstrow(test.dt.p4)->test.dt.p4, funcs:firstrow(test.dt.p5)->test.dt.p5, funcs:firstrow(test.dt.p6_md5)->test.dt.p6_md5, funcs:firstrow(test.dt.p7_md5)->test.dt.p7_md5
-    └─IndexLookUp_17	308.93	root		
-      ├─IndexRangeScan_13(Build)	1841.60	cop[tikv]	table:dt, index:cm(cm)	range:[1062,1062], [1086,1086], [1423,1423], [1424,1424], [1425,1425], [1426,1426], [1427,1427], [1428,1428], [1429,1429], [1430,1430], [1431,1431], [1432,1432], [1433,1433], [1434,1434], [1435,1435], [1436,1436], [1437,1437], [1438,1438], [1439,1439], [1440,1440], [1441,1441], [1442,1442], [1443,1443], [1444,1444], [1445,1445], [1446,1446], [1447,1447], [1448,1448], [1449,1449], [1450,1450], [1451,1451], [1452,1452], [1488,1488], [1489,1489], [1490,1490], [1491,1491], [1492,1492], [1493,1493], [1494,1494], [1495,1495], [1496,1496], [1497,1497], [1550,1550], [1551,1551], [1552,1552], [1553,1553], [1554,1554], [1555,1555], [1556,1556], [1557,1557], [1558,1558], [1559,1559], [1597,1597], [1598,1598], [1599,1599], [1600,1600], [1601,1601], [1602,1602], [1603,1603], [1604,1604], [1605,1605], [1606,1606], [1607,1607], [1608,1608], [1609,1609], [1610,1610], [1611,1611], [1612,1612], [1613,1613], [1614,1614], [1615,1615], [1616,1616], [1623,1623], [1624,1624], [1625,1625], [1626,1626], [1627,1627], [1628,1628], [1629,1629], [1630,1630], [1631,1631], [1632,1632], [1709,1709], [1719,1719], [1720,1720], [1843,1843], [2813,2813], [2814,2814], [2815,2815], [2816,2816], [2817,2817], [2818,2818], [2819,2819], [2820,2820], [2821,2821], [2822,2822], [2823,2823], [2824,2824], [2825,2825], [2826,2826], [2827,2827], [2828,2828], [2829,2829], [2830,2830], [2831,2831], [2832,2832], [2833,2833], [2834,2834], [2835,2835], [2836,2836], [2837,2837], [2838,2838], [2839,2839], [2840,2840], [2841,2841], [2842,2842], [2843,2843], [2844,2844], [2845,2845], [2846,2846], [2847,2847], [2848,2848], [2849,2849], [2850,2850], [2851,2851], [2852,2852], [2853,2853], [2854,2854], [2855,2855], [2856,2856], [2857,2857], [2858,2858], [2859,2859], [2860,2860], [2861,2861], [2862,2862], [2863,2863], [2864,2864], [2865,2865], [2866,2866], [2867,2867], [2868,2868], [2869,2869], [2870,2870], [2871,2871], [2872,2872], [3139,3139], [3140,3140], [3141,3141], [3142,3142], [3143,3143], [3144,3144], [3145,3145], [3146,3146], [3147,3147], [3148,3148], [3149,3149], [3150,3150], [3151,3151], [3152,3152], [3153,3153], [3154,3154], [3155,3155], [3156,3156], [3157,3157], [3158,3158], [3386,3386], [3387,3387], [3388,3388], [3389,3389], [3390,3390], [3391,3391], [3392,3392], [3393,3393], [3394,3394], [3395,3395], [3664,3664], [3665,3665], [3666,3666], [3667,3667], [3668,3668], [3670,3670], [3671,3671], [3672,3672], [3673,3673], [3674,3674], [3676,3676], [3677,3677], [3678,3678], [3679,3679], [3680,3680], [3681,3681], [3682,3682], [3683,3683], [3684,3684], [3685,3685], [3686,3686], [3687,3687], [3688,3688], [3689,3689], [3690,3690], [3691,3691], [3692,3692], [3693,3693], [3694,3694], [3695,3695], [3696,3696], [3697,3697], [3698,3698], [3699,3699], [3700,3700], [3701,3701], [3702,3702], [3703,3703], [3704,3704], [3705,3705], [3706,3706], [3707,3707], [3708,3708], [3709,3709], [3710,3710], [3711,3711], [3712,3712], [3713,3713], [3714,3714], [3715,3715], [3960,3960], [3961,3961], [3962,3962], [3963,3963], [3964,3964], [3965,3965], [3966,3966], [3967,3967], [3968,3968], [3978,3978], [3979,3979], [3980,3980], [3981,3981], [3982,3982], [3983,3983], [3984,3984], [3985,3985], [3986,3986], [3987,3987], [4208,4208], [4209,4209], [4210,4210], [4211,4211], [4212,4212], [4304,4304], [4305,4305], [4306,4306], [4307,4307], [4308,4308], [4866,4866], [4867,4867], [4868,4868], [4869,4869], [4870,4870], [4871,4871], [4872,4872], [4873,4873], [4874,4874], [4875,4875], keep order:false
-      └─HashAgg_11(Probe)	308.93	cop[tikv]		group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(test.dt.dic)->Column#32, funcs:firstrow(test.dt.ds2)->Column#34
-        └─Selection_15	309.39	cop[tikv]		ge(test.dt.ds, 2016-09-01 00:00:00.000000), le(test.dt.ds, 2016-11-03 00:00:00.000000)
-          └─TableRowIDScan_14	1841.60	cop[tikv]	table:dt	keep order:false
+Projection_7	21.53	root		test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, Column#21
+└─Sort_8	21.53	root		test.dt.ds2:desc
+  └─HashAgg_16	21.53	root		group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(Column#32)->Column#21, funcs:firstrow(test.dt.ds)->test.dt.ds, funcs:firstrow(Column#34)->test.dt.ds2, funcs:firstrow(test.dt.p1)->test.dt.p1, funcs:firstrow(test.dt.p2)->test.dt.p2, funcs:firstrow(test.dt.p3)->test.dt.p3, funcs:firstrow(test.dt.p4)->test.dt.p4, funcs:firstrow(test.dt.p5)->test.dt.p5, funcs:firstrow(test.dt.p6_md5)->test.dt.p6_md5, funcs:firstrow(test.dt.p7_md5)->test.dt.p7_md5
+    └─IndexLookUp_17	21.53	root		
+      ├─IndexRangeScan_13(Build)	128.32	cop[tikv]	table:dt, index:cm(cm)	range:[1062,1062], [1086,1086], [1423,1423], [1424,1424], [1425,1425], [1426,1426], [1427,1427], [1428,1428], [1429,1429], [1430,1430], [1431,1431], [1432,1432], [1433,1433], [1434,1434], [1435,1435], [1436,1436], [1437,1437], [1438,1438], [1439,1439], [1440,1440], [1441,1441], [1442,1442], [1443,1443], [1444,1444], [1445,1445], [1446,1446], [1447,1447], [1448,1448], [1449,1449], [1450,1450], [1451,1451], [1452,1452], [1488,1488], [1489,1489], [1490,1490], [1491,1491], [1492,1492], [1493,1493], [1494,1494], [1495,1495], [1496,1496], [1497,1497], [1550,1550], [1551,1551], [1552,1552], [1553,1553], [1554,1554], [1555,1555], [1556,1556], [1557,1557], [1558,1558], [1559,1559], [1597,1597], [1598,1598], [1599,1599], [1600,1600], [1601,1601], [1602,1602], [1603,1603], [1604,1604], [1605,1605], [1606,1606], [1607,1607], [1608,1608], [1609,1609], [1610,1610], [1611,1611], [1612,1612], [1613,1613], [1614,1614], [1615,1615], [1616,1616], [1623,1623], [1624,1624], [1625,1625], [1626,1626], [1627,1627], [1628,1628], [1629,1629], [1630,1630], [1631,1631], [1632,1632], [1709,1709], [1719,1719], [1720,1720], [1843,1843], [2813,2813], [2814,2814], [2815,2815], [2816,2816], [2817,2817], [2818,2818], [2819,2819], [2820,2820], [2821,2821], [2822,2822], [2823,2823], [2824,2824], [2825,2825], [2826,2826], [2827,2827], [2828,2828], [2829,2829], [2830,2830], [2831,2831], [2832,2832], [2833,2833], [2834,2834], [2835,2835], [2836,2836], [2837,2837], [2838,2838], [2839,2839], [2840,2840], [2841,2841], [2842,2842], [2843,2843], [2844,2844], [2845,2845], [2846,2846], [2847,2847], [2848,2848], [2849,2849], [2850,2850], [2851,2851], [2852,2852], [2853,2853], [2854,2854], [2855,2855], [2856,2856], [2857,2857], [2858,2858], [2859,2859], [2860,2860], [2861,2861], [2862,2862], [2863,2863], [2864,2864], [2865,2865], [2866,2866], [2867,2867], [2868,2868], [2869,2869], [2870,2870], [2871,2871], [2872,2872], [3139,3139], [3140,3140], [3141,3141], [3142,3142], [3143,3143], [3144,3144], [3145,3145], [3146,3146], [3147,3147], [3148,3148], [3149,3149], [3150,3150], [3151,3151], [3152,3152], [3153,3153], [3154,3154], [3155,3155], [3156,3156], [3157,3157], [3158,3158], [3386,3386], [3387,3387], [3388,3388], [3389,3389], [3390,3390], [3391,3391], [3392,3392], [3393,3393], [3394,3394], [3395,3395], [3664,3664], [3665,3665], [3666,3666], [3667,3667], [3668,3668], [3670,3670], [3671,3671], [3672,3672], [3673,3673], [3674,3674], [3676,3676], [3677,3677], [3678,3678], [3679,3679], [3680,3680], [3681,3681], [3682,3682], [3683,3683], [3684,3684], [3685,3685], [3686,3686], [3687,3687], [3688,3688], [3689,3689], [3690,3690], [3691,3691], [3692,3692], [3693,3693], [3694,3694], [3695,3695], [3696,3696], [3697,3697], [3698,3698], [3699,3699], [3700,3700], [3701,3701], [3702,3702], [3703,3703], [3704,3704], [3705,3705], [3706,3706], [3707,3707], [3708,3708], [3709,3709], [3710,3710], [3711,3711], [3712,3712], [3713,3713], [3714,3714], [3715,3715], [3960,3960], [3961,3961], [3962,3962], [3963,3963], [3964,3964], [3965,3965], [3966,3966], [3967,3967], [3968,3968], [3978,3978], [3979,3979], [3980,3980], [3981,3981], [3982,3982], [3983,3983], [3984,3984], [3985,3985], [3986,3986], [3987,3987], [4208,4208], [4209,4209], [4210,4210], [4211,4211], [4212,4212], [4304,4304], [4305,4305], [4306,4306], [4307,4307], [4308,4308], [4866,4866], [4867,4867], [4868,4868], [4869,4869], [4870,4870], [4871,4871], [4872,4872], [4873,4873], [4874,4874], [4875,4875], keep order:false
+      └─HashAgg_11(Probe)	21.53	cop[tikv]		group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(test.dt.dic)->Column#32, funcs:firstrow(test.dt.ds2)->Column#34
+        └─Selection_15	21.56	cop[tikv]		ge(test.dt.ds, 2016-09-01 00:00:00.000000), le(test.dt.ds, 2016-11-03 00:00:00.000000)
+          └─TableRowIDScan_14	128.32	cop[tikv]	table:dt	keep order:false
 explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext, gad.t as gtime from st gad join (select id, aid, pt, dic, ip, t from dd where pt = 'android' and bm = 0 and t > 1478143908) sdk on  gad.aid = sdk.aid and gad.ip = sdk.ip and sdk.t > gad.t where gad.t > 1478143908 and gad.bm = 0 and gad.pt = 'android' group by gad.aid, sdk.dic limit 2500;
 id	estRows	task	access object	operator info
 Projection_13	424.00	root		test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t
diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result
index a0d1e57cb8379..c3a46f969837b 100644
--- a/cmd/explaintest/r/explain_easy_stats.result
+++ b/cmd/explaintest/r/explain_easy_stats.result
@@ -42,16 +42,16 @@ TableReader_6	1999.00	root		data:TableRangeScan_5
 └─TableRangeScan_5	1999.00	cop[tikv]	table:t1	range:(0,+inf], keep order:false
 explain select t1.c1, t1.c2 from t1 where t1.c2 = 1;
 id	estRows	task	access object	operator info
-IndexReader_6	8.00	root		index:IndexRangeScan_5
-└─IndexRangeScan_5	8.00	cop[tikv]	table:t1, index:c2(c2)	range:[1,1], keep order:false
+IndexReader_6	0.00	root		index:IndexRangeScan_5
+└─IndexRangeScan_5	0.00	cop[tikv]	table:t1, index:c2(c2)	range:[1,1], keep order:false
 explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1;
 id	estRows	task	access object	operator info
 HashJoin_22	2481.25	root		left outer join, equal:[eq(test.t1.c2, test.t2.c1)]
 ├─TableReader_36(Build)	1985.00	root		data:Selection_35
 │ └─Selection_35	1985.00	cop[tikv]		not(isnull(test.t2.c1))
 │   └─TableFullScan_34	1985.00	cop[tikv]	table:t2	keep order:false
-└─TableReader_33(Probe)	1991.00	root		data:TableRangeScan_32
-  └─TableRangeScan_32	1991.00	cop[tikv]	table:t1	range:(1,+inf], keep order:false
+└─TableReader_33(Probe)	1998.00	root		data:TableRangeScan_32
+  └─TableRangeScan_32	1998.00	cop[tikv]	table:t1	range:(1,+inf], keep order:false
 explain update t1 set t1.c2 = 2 where t1.c1 = 1;
 id	estRows	task	access object	operator info
 Update_3	N/A	root		N/A
@@ -59,9 +59,9 @@ Update_3	N/A	root		N/A
 explain delete from t1 where t1.c2 = 1;
 id	estRows	task	access object	operator info
 Delete_4	N/A	root		N/A
-└─IndexLookUp_11	8.00	root		
-  ├─IndexRangeScan_9(Build)	8.00	cop[tikv]	table:t1, index:c2(c2)	range:[1,1], keep order:false
-  └─TableRowIDScan_10(Probe)	8.00	cop[tikv]	table:t1	keep order:false
+└─IndexLookUp_11	0.00	root		
+  ├─IndexRangeScan_9(Build)	0.00	cop[tikv]	table:t1, index:c2(c2)	range:[1,1], keep order:false
+  └─TableRowIDScan_10(Probe)	0.00	cop[tikv]	table:t1	keep order:false
 explain select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1;
 id	estRows	task	access object	operator info
 Projection_11	1985.00	root		cast(Column#8, bigint(21) BINARY)->Column#7
@@ -80,10 +80,10 @@ TopN_7	1.00	root		test.t2.c2, offset:0, count:1
     └─TableFullScan_13	1985.00	cop[tikv]	table:t2	keep order:false
 explain select * from t1 where c1 > 1 and c2 = 1 and c3 < 1;
 id	estRows	task	access object	operator info
-IndexLookUp_11	0.51	root		
-├─IndexRangeScan_8(Build)	1.00	cop[tikv]	table:t1, index:c2(c2)	range:(1 1,1 +inf], keep order:false
-└─Selection_10(Probe)	0.51	cop[tikv]		lt(test.t1.c3, 1)
-  └─TableRowIDScan_9	1.00	cop[tikv]	table:t1	keep order:false
+IndexLookUp_11	0.00	root		
+├─IndexRangeScan_8(Build)	0.00	cop[tikv]	table:t1, index:c2(c2)	range:(1 1,1 +inf], keep order:false
+└─Selection_10(Probe)	0.00	cop[tikv]		lt(test.t1.c3, 1)
+  └─TableRowIDScan_9	0.00	cop[tikv]	table:t1	keep order:false
 explain select * from t1 where c1 = 1 and c2 > 1;
 id	estRows	task	access object	operator info
 Selection_6	0.50	root		gt(test.t1.c2, 1)
diff --git a/cmd/explaintest/r/explain_indexmerge.result b/cmd/explaintest/r/explain_indexmerge.result
index d7eeb2c6f7bd8..ef7f6cdf80088 100644
--- a/cmd/explaintest/r/explain_indexmerge.result
+++ b/cmd/explaintest/r/explain_indexmerge.result
@@ -99,11 +99,11 @@ label = "cop"
 set session tidb_enable_index_merge = off;
 explain select /*+ use_index_merge(t, primary, tb, tc) */ * from t where a <= 500000 or b <= 1000000 or c <= 3000000;
 id	estRows	task	access object	operator info
-IndexMerge_9	3570485.44	root		
-├─TableRangeScan_5(Build)	532767.00	cop[tikv]	table:t	range:[-inf,500000], keep order:false
+IndexMerge_9	3560000.00	root		
+├─TableRangeScan_5(Build)	500000.00	cop[tikv]	table:t	range:[-inf,500000], keep order:false
 ├─IndexRangeScan_6(Build)	1000000.00	cop[tikv]	table:t, index:tb(b)	range:[-inf,1000000], keep order:false
 ├─IndexRangeScan_7(Build)	3000000.00	cop[tikv]	table:t, index:tc(c)	range:[-inf,3000000], keep order:false
-└─TableRowIDScan_8(Probe)	3570485.44	cop[tikv]	table:t	keep order:false
+└─TableRowIDScan_8(Probe)	3560000.00	cop[tikv]	table:t	keep order:false
 explain select /*+ use_index_merge(t, tb, tc) */ * from t where b < 50 or c < 5000000;
 id	estRows	task	access object	operator info
 IndexMerge_8	4999999.00	root		
diff --git a/cmd/explaintest/r/explain_join_stats.result b/cmd/explaintest/r/explain_join_stats.result
index 262c52085f21b..723df63732dbe 100644
--- a/cmd/explaintest/r/explain_join_stats.result
+++ b/cmd/explaintest/r/explain_join_stats.result
@@ -7,25 +7,21 @@ load stats 's/explain_join_stats_lo.json';
 explain select count(*) from e, lo where lo.a=e.a and e.b=22336;
 id	estRows	task	access object	operator info
 StreamAgg_13	1.00	root		funcs:count(1)->Column#5
-└─HashJoin_89	20044.00	root		inner join, equal:[eq(test.lo.a, test.e.a)]
+└─HashJoin_89	19977.00	root		inner join, equal:[eq(test.lo.a, test.e.a)]
   ├─TableReader_50(Build)	250.00	root		data:TableFullScan_49
   │ └─TableFullScan_49	250.00	cop[tikv]	table:lo	keep order:false
-  └─IndexLookUp_61(Probe)	20044.00	root		
-    ├─IndexRangeScan_58(Build)	20044.00	cop[tikv]	table:e, index:idx_b(b)	range:[22336,22336], keep order:false
-    └─Selection_60(Probe)	20044.00	cop[tikv]		not(isnull(test.e.a))
-      └─TableRowIDScan_59	20044.00	cop[tikv]	table:e	keep order:false
+  └─IndexLookUp_61(Probe)	19977.00	root		
+    ├─IndexRangeScan_58(Build)	19977.00	cop[tikv]	table:e, index:idx_b(b)	range:[22336,22336], keep order:false
+    └─Selection_60(Probe)	19977.00	cop[tikv]		not(isnull(test.e.a))
+      └─TableRowIDScan_59	19977.00	cop[tikv]	table:e	keep order:false
 explain select /*+ TIDB_INLJ(e) */ count(*) from e, lo where lo.a=e.a and e.b=22336;
 id	estRows	task	access object	operator info
 StreamAgg_12	1.00	root		funcs:count(1)->Column#5
-<<<<<<< HEAD
-└─IndexJoin_56	20044.00	root		inner join, inner:IndexLookUp_55, outer key:test.lo.a, inner key:test.e.a
-=======
 └─IndexJoin_56	19977.00	root		inner join, inner:IndexLookUp_55, outer key:test.lo.a, inner key:test.e.a, equal cond:eq(test.lo.a, test.e.a)
->>>>>>> master
   ├─TableReader_40(Build)	250.00	root		data:TableFullScan_39
   │ └─TableFullScan_39	250.00	cop[tikv]	table:lo	keep order:false
-  └─IndexLookUp_55(Probe)	80.18	root		
+  └─IndexLookUp_55(Probe)	79.91	root		
     ├─Selection_53(Build)	4080.00	cop[tikv]		not(isnull(test.e.a))
     │ └─IndexRangeScan_51	4080.00	cop[tikv]	table:e, index:idx_a(a)	range: decided by [eq(test.e.a, test.lo.a)], keep order:false
-    └─Selection_54(Probe)	80.18	cop[tikv]		eq(test.e.b, 22336)
+    └─Selection_54(Probe)	79.91	cop[tikv]		eq(test.e.b, 22336)
       └─TableRowIDScan_52	4080.00	cop[tikv]	table:e	keep order:false
diff --git a/executor/analyze_test.go b/executor/analyze_test.go
index 0c26ef3de110c..d9e654ecd5729 100644
--- a/executor/analyze_test.go
+++ b/executor/analyze_test.go
@@ -461,8 +461,8 @@ func (s *testFastAnalyze) TestFastAnalyze(c *C) {
 	tk.MustExec("insert into t2 values (0), (18446744073709551615)")
 	tk.MustExec("analyze table t2")
 	tk.MustQuery("show stats_buckets where table_name = 't2'").Check(testkit.Rows(
-		"test t2  a 0 0 1 1 0 0",
-		"test t2  a 0 1 2 1 18446744073709551615 18446744073709551615"))
+		"test t2  a 0 0 1 1 0 0 0",
+		"test t2  a 0 1 2 1 18446744073709551615 18446744073709551615 0"))
 
 	tk.MustExec(`set @@tidb_partition_prune_mode='` + string(variable.StaticOnly) + `'`)
 	tk.MustExec(`create table t3 (id int, v int, primary key(id), index k(v)) partition by hash (id) partitions 4`)
@@ -531,6 +531,7 @@ func (s *testSuite1) TestAnalyzeIncremental(c *C) {
 }
 
 func (s *testSuite1) TestAnalyzeIncrementalStreaming(c *C) {
+	c.Skip("unistore hasn't support streaming yet.")
 	tk := testkit.NewTestKit(c, s.store)
 	tk.MustExec("use test")
 	tk.Se.GetSessionVars().EnableStreaming = true
@@ -545,13 +546,13 @@ func (s *testSuite1) testAnalyzeIncremental(tk *testkit.TestKit, c *C) {
 	tk.MustQuery("show stats_buckets").Check(testkit.Rows())
 	tk.MustExec("insert into t values (1,1)")
 	tk.MustExec("analyze incremental table t index")
-	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1", "test t  idx 1 0 1 1 1 1"))
+	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1 0", "test t  idx 1 0 1 1 1 1 0"))
 	tk.MustExec("insert into t values (2,2)")
 	tk.MustExec("analyze incremental table t index")
-	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1", "test t  a 0 1 2 1 2 2", "test t  idx 1 0 1 1 1 1", "test t  idx 1 1 2 1 2 2"))
+	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1 0", "test t  a 0 1 2 1 2 2 0", "test t  idx 1 0 1 1 1 1 0", "test t  idx 1 1 2 1 2 2 0"))
 	tk.MustExec("analyze incremental table t index")
 	// Result should not change.
-	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1", "test t  a 0 1 2 1 2 2", "test t  idx 1 0 1 1 1 1", "test t  idx 1 1 2 1 2 2"))
+	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1 0", "test t  a 0 1 2 1 2 2 0", "test t  idx 1 0 1 1 1 1 0", "test t  idx 1 1 2 1 2 2 0"))
 
 	// Test analyze incremental with feedback.
 	tk.MustExec("insert into t values (3,3)")
@@ -574,7 +575,7 @@ func (s *testSuite1) testAnalyzeIncremental(tk *testkit.TestKit, c *C) {
 	c.Assert(h.DumpStatsFeedbackToKV(), IsNil)
 	c.Assert(h.HandleUpdateStats(is), IsNil)
 	c.Assert(h.Update(is), IsNil)
-	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1", "test t  a 0 1 3 0 2 2147483647", "test t  idx 1 0 1 1 1 1", "test t  idx 1 1 2 1 2 2"))
+	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1 0", "test t  a 0 1 3 0 2 2147483647 0", "test t  idx 1 0 1 1 1 1 0", "test t  idx 1 1 2 1 2 2 0"))
 	tblStats := h.GetTableStats(tblInfo)
 	val, err := codec.EncodeKey(tk.Se.GetSessionVars().StmtCtx, nil, types.NewIntDatum(3))
 	c.Assert(err, IsNil)
@@ -583,8 +584,8 @@ func (s *testSuite1) testAnalyzeIncremental(tk *testkit.TestKit, c *C) {
 	c.Assert(statistics.IsAnalyzed(tblStats.Columns[tblInfo.Columns[0].ID].Flag), IsFalse)
 
 	tk.MustExec("analyze incremental table t index")
-	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1", "test t  a 0 1 2 1 2 2", "test t  a 0 2 3 1 3 3",
-		"test t  idx 1 0 1 1 1 1", "test t  idx 1 1 2 1 2 2", "test t  idx 1 2 3 1 3 3"))
+	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  a 0 0 1 1 1 1 0", "test t  a 0 1 2 1 2 2 0", "test t  a 0 2 3 1 3 3 0",
+		"test t  idx 1 0 1 1 1 1 0", "test t  idx 1 1 2 1 2 2 0", "test t  idx 1 2 3 1 3 3 0"))
 	tblStats = h.GetTableStats(tblInfo)
 	c.Assert(tblStats.Indices[tblInfo.Indices[0].ID].QueryBytes(val), Equals, uint64(1))
 }
@@ -760,36 +761,36 @@ func (s *testSuite1) TestNormalAnalyzeOnCommonHandle(c *C) {
 	tk.MustExec("analyze table t1, t2, t3")
 
 	tk.MustQuery(`show stats_buckets where table_name in ("t1", "t2", "t3")`).Sort().Check(testkit.Rows(
-		"test t1  a 0 0 1 1 1 1",
-		"test t1  a 0 1 2 1 2 2",
-		"test t1  a 0 2 3 1 3 3",
-		"test t1  b 0 0 1 1 1 1",
-		"test t1  b 0 1 2 1 2 2",
-		"test t1  b 0 2 3 1 3 3",
-		"test t2  PRIMARY 1 0 1 1 111 111",
-		"test t2  PRIMARY 1 1 2 1 222 222",
-		"test t2  PRIMARY 1 2 3 1 333 333",
-		"test t2  a 0 0 1 1 111 111",
-		"test t2  a 0 1 2 1 222 222",
-		"test t2  a 0 2 3 1 333 333",
-		"test t2  b 0 0 1 1 1 1",
-		"test t2  b 0 1 2 1 2 2",
-		"test t2  b 0 2 3 1 3 3",
-		"test t3  PRIMARY 1 0 1 1 (1, 1) (1, 1)",
-		"test t3  PRIMARY 1 1 2 1 (2, 2) (2, 2)",
-		"test t3  PRIMARY 1 2 3 1 (3, 3) (3, 3)",
-		"test t3  a 0 0 1 1 1 1",
-		"test t3  a 0 1 2 1 2 2",
-		"test t3  a 0 2 3 1 3 3",
-		"test t3  b 0 0 1 1 1 1",
-		"test t3  b 0 1 2 1 2 2",
-		"test t3  b 0 2 3 1 3 3",
-		"test t3  c 0 0 1 1 1 1",
-		"test t3  c 0 1 2 1 2 2",
-		"test t3  c 0 2 3 1 3 3",
-		"test t3  c 1 0 1 1 1 1",
-		"test t3  c 1 1 2 1 2 2",
-		"test t3  c 1 2 3 1 3 3"))
+		"test t1  a 0 0 1 1 1 1 0",
+		"test t1  a 0 1 2 1 2 2 0",
+		"test t1  a 0 2 3 1 3 3 0",
+		"test t1  b 0 0 1 1 1 1 0",
+		"test t1  b 0 1 2 1 2 2 0",
+		"test t1  b 0 2 3 1 3 3 0",
+		"test t2  PRIMARY 1 0 1 1 111 111 0",
+		"test t2  PRIMARY 1 1 2 1 222 222 0",
+		"test t2  PRIMARY 1 2 3 1 333 333 0",
+		"test t2  a 0 0 1 1 111 111 0",
+		"test t2  a 0 1 2 1 222 222 0",
+		"test t2  a 0 2 3 1 333 333 0",
+		"test t2  b 0 0 1 1 1 1 0",
+		"test t2  b 0 1 2 1 2 2 0",
+		"test t2  b 0 2 3 1 3 3 0",
+		"test t3  PRIMARY 1 0 1 1 (1, 1) (1, 1) 0",
+		"test t3  PRIMARY 1 1 2 1 (2, 2) (2, 2) 0",
+		"test t3  PRIMARY 1 2 3 1 (3, 3) (3, 3) 0",
+		"test t3  a 0 0 1 1 1 1 0",
+		"test t3  a 0 1 2 1 2 2 0",
+		"test t3  a 0 2 3 1 3 3 0",
+		"test t3  b 0 0 1 1 1 1 0",
+		"test t3  b 0 1 2 1 2 2 0",
+		"test t3  b 0 2 3 1 3 3 0",
+		"test t3  c 0 0 1 1 1 1 0",
+		"test t3  c 0 1 2 1 2 2 0",
+		"test t3  c 0 2 3 1 3 3 0",
+		"test t3  c 1 0 1 1 1 1 0",
+		"test t3  c 1 1 2 1 2 2 0",
+		"test t3  c 1 2 3 1 3 3 0"))
 }
 
 func (s *testSuite1) TestDefaultValForAnalyze(c *C) {
@@ -837,15 +838,15 @@ func (s *testSerialSuite2) TestIssue20874(c *C) {
 	tk.MustExec("insert into t values ('#', 'C'), ('$', 'c'), ('a', 'a')")
 	tk.MustExec("analyze table t")
 	tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check(testkit.Rows(
-		"test t  a 0 0 1 1 \x02\xd2 \x02\xd2",
-		"test t  a 0 1 2 1 \x0e\x0f \x0e\x0f",
-		"test t  a 0 2 3 1 \x0e3 \x0e3",
-		"test t  b 0 0 1 1 \x00A \x00A",
-		"test t  b 0 1 3 2 \x00C \x00C",
-		"test t  idxa 1 0 1 1 \x02\xd2 \x02\xd2",
-		"test t  idxa 1 1 2 1 \x0e\x0f \x0e\x0f",
-		"test t  idxa 1 2 3 1 \x0e3 \x0e3",
-		"test t  idxb 1 0 1 1 \x00A \x00A",
-		"test t  idxb 1 1 3 2 \x00C \x00C",
+		"test t  a 0 0 1 1 \x02\xd2 \x02\xd2 0",
+		"test t  a 0 1 2 1 \x0e\x0f \x0e\x0f 0",
+		"test t  a 0 2 3 1 \x0e3 \x0e3 0",
+		"test t  b 0 0 1 1 \x00A \x00A 0",
+		"test t  b 0 1 3 2 \x00C \x00C 0",
+		"test t  idxa 1 0 1 1 \x02\xd2 \x02\xd2 0",
+		"test t  idxa 1 1 2 1 \x0e\x0f \x0e\x0f 0",
+		"test t  idxa 1 2 3 1 \x0e3 \x0e3 0",
+		"test t  idxb 1 0 1 1 \x00A \x00A 0",
+		"test t  idxb 1 1 3 2 \x00C \x00C 0",
 	))
 }
diff --git a/executor/show_stats_test.go b/executor/show_stats_test.go
index 270c35f5abf2d..f21ada8ea2b1c 100644
--- a/executor/show_stats_test.go
+++ b/executor/show_stats_test.go
@@ -80,36 +80,36 @@ func (s *testShowStatsSuite) TestShowStatsBuckets(c *C) {
 	tk.MustExec("insert into t values (1,1)")
 	tk.MustExec("analyze table t")
 	result := tk.MustQuery("show stats_buckets").Sort()
-	result.Check(testkit.Rows("test t  a 0 0 1 1 1 1", "test t  b 0 0 1 1 1 1", "test t  idx 1 0 1 1 (1, 1) (1, 1)"))
+	result.Check(testkit.Rows("test t  a 0 0 1 1 1 1 0", "test t  b 0 0 1 1 1 1 0", "test t  idx 1 0 1 1 (1, 1) (1, 1) 0"))
 	result = tk.MustQuery("show stats_buckets where column_name = 'idx'")
-	result.Check(testkit.Rows("test t  idx 1 0 1 1 (1, 1) (1, 1)"))
+	result.Check(testkit.Rows("test t  idx 1 0 1 1 (1, 1) (1, 1) 0"))
 
 	tk.MustExec("drop table t")
 	tk.MustExec("create table t (`a` datetime, `b` int, key `idx`(`a`, `b`))")
 	tk.MustExec("insert into t values (\"2020-01-01\", 1)")
 	tk.MustExec("analyze table t")
 	result = tk.MustQuery("show stats_buckets").Sort()
-	result.Check(testkit.Rows("test t  a 0 0 1 1 2020-01-01 00:00:00 2020-01-01 00:00:00", "test t  b 0 0 1 1 1 1", "test t  idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1)"))
+	result.Check(testkit.Rows("test t  a 0 0 1 1 2020-01-01 00:00:00 2020-01-01 00:00:00 0", "test t  b 0 0 1 1 1 1 0", "test t  idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1) 0"))
 	result = tk.MustQuery("show stats_buckets where column_name = 'idx'")
-	result.Check(testkit.Rows("test t  idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1)"))
+	result.Check(testkit.Rows("test t  idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1) 0"))
 
 	tk.MustExec("drop table t")
 	tk.MustExec("create table t (`a` date, `b` int, key `idx`(`a`, `b`))")
 	tk.MustExec("insert into t values (\"2020-01-01\", 1)")
 	tk.MustExec("analyze table t")
 	result = tk.MustQuery("show stats_buckets").Sort()
-	result.Check(testkit.Rows("test t  a 0 0 1 1 2020-01-01 2020-01-01", "test t  b 0 0 1 1 1 1", "test t  idx 1 0 1 1 (2020-01-01, 1) (2020-01-01, 1)"))
+	result.Check(testkit.Rows("test t  a 0 0 1 1 2020-01-01 2020-01-01 0", "test t  b 0 0 1 1 1 1 0", "test t  idx 1 0 1 1 (2020-01-01, 1) (2020-01-01, 1) 0"))
 	result = tk.MustQuery("show stats_buckets where column_name = 'idx'")
-	result.Check(testkit.Rows("test t  idx 1 0 1 1 (2020-01-01, 1) (2020-01-01, 1)"))
+	result.Check(testkit.Rows("test t  idx 1 0 1 1 (2020-01-01, 1) (2020-01-01, 1) 0"))
 
 	tk.MustExec("drop table t")
 	tk.MustExec("create table t (`a` timestamp, `b` int, key `idx`(`a`, `b`))")
 	tk.MustExec("insert into t values (\"2020-01-01\", 1)")
 	tk.MustExec("analyze table t")
 	result = tk.MustQuery("show stats_buckets").Sort()
-	result.Check(testkit.Rows("test t  a 0 0 1 1 2020-01-01 00:00:00 2020-01-01 00:00:00", "test t  b 0 0 1 1 1 1", "test t  idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1)"))
+	result.Check(testkit.Rows("test t  a 0 0 1 1 2020-01-01 00:00:00 2020-01-01 00:00:00 0", "test t  b 0 0 1 1 1 1 0", "test t  idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1) 0"))
 	result = tk.MustQuery("show stats_buckets where column_name = 'idx'")
-	result.Check(testkit.Rows("test t  idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1)"))
+	result.Check(testkit.Rows("test t  idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1) 0"))
 }
 
 func (s *testShowStatsSuite) TestShowStatsHasNullValue(c *C) {
@@ -124,14 +124,14 @@ func (s *testShowStatsSuite) TestShowStatsHasNullValue(c *C) {
 	tk.MustExec("insert into t values(1)")
 	tk.MustExec("analyze table t")
 	tk.MustQuery("show stats_buckets").Sort().Check(testkit.Rows(
-		"test t  a 0 0 1 1 1 1",
-		"test t  idx 1 0 1 1 1 1",
+		"test t  a 0 0 1 1 1 1 0",
+		"test t  idx 1 0 1 1 1 1 0",
 	))
 	tk.MustExec("drop table t")
 	tk.MustExec("create table t (a int, b int, index idx(a, b))")
 	tk.MustExec("insert into t values(NULL, NULL)")
 	tk.MustExec("analyze table t")
-	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  idx 1 0 1 1 (NULL, NULL) (NULL, NULL)"))
+	tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t  idx 1 0 1 1 (NULL, NULL) (NULL, NULL) 0"))
 
 	tk.MustExec("drop table t")
 	tk.MustExec("create table t(a int, b int, c int, index idx_b(b), index idx_c_a(c, a))")
@@ -201,7 +201,7 @@ func (s *testShowStatsSuite) TestShowPartitionStats(c *C) {
 		c.Assert(result.Rows()[2][3], Equals, "idx")
 
 		result = tk.MustQuery("show stats_buckets").Sort()
-		result.Check(testkit.Rows("test t p0 a 0 0 1 1 1 1", "test t p0 b 0 0 1 1 1 1", "test t p0 idx 1 0 1 1 1 1"))
+		result.Check(testkit.Rows("test t p0 a 0 0 1 1 1 1 0", "test t p0 b 0 0 1 1 1 1 0", "test t p0 idx 1 0 1 1 1 1 0"))
 
 		result = tk.MustQuery("show stats_healthy")
 		result.Check(testkit.Rows("test t p0 100"))
diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go
index 49e16dae4102a..fc60c85817d40 100644
--- a/planner/core/cbo_test.go
+++ b/planner/core/cbo_test.go
@@ -558,10 +558,10 @@ func (s *testAnalyzeSuite) TestInconsistentEstimation(c *C) {
 	// the `a = 5 and c = 5` will get 10, it is not consistent.
 	tk.MustQuery("explain select * from t use index(ab) where a = 5 and c = 5").
 		Check(testkit.Rows(
-			"IndexLookUp_8 7.00 root  ",
-			"├─IndexRangeScan_5(Build) 8.75 cop[tikv] table:t, index:ab(a, b) range:[5,5], keep order:false",
-			"└─Selection_7(Probe) 7.00 cop[tikv]  eq(test.t.c, 5)",
-			"  └─TableRowIDScan_6 8.75 cop[tikv] table:t keep order:false",
+			"IndexLookUp_8 10.00 root  ",
+			"├─IndexRangeScan_5(Build) 12.50 cop[tikv] table:t, index:ab(a, b) range:[5,5], keep order:false",
+			"└─Selection_7(Probe) 10.00 cop[tikv]  eq(test.t.c, 5)",
+			"  └─TableRowIDScan_6 12.50 cop[tikv] table:t keep order:false",
 		))
 }
 
diff --git a/planner/core/testdata/analyze_suite_out.json b/planner/core/testdata/analyze_suite_out.json
index 203df046cec34..32d4cb6b49e15 100644
--- a/planner/core/testdata/analyze_suite_out.json
+++ b/planner/core/testdata/analyze_suite_out.json
@@ -322,7 +322,7 @@
       "IndexReader(Index(t.e)[[-inf,10]]->StreamAgg)->StreamAgg",
       "IndexReader(Index(t.e)[[-inf,50]]->StreamAgg)->StreamAgg",
       "IndexReader(Index(t.b_c)[[NULL,+inf]]->Sel([gt(test.t.c, 1)])->HashAgg)->HashAgg",
-      "IndexLookUp(Index(t.e)[[1,1]], Table(t)->HashAgg)->HashAgg",
+      "IndexLookUp(Index(t.e)[[1,1]], Table(t))->HashAgg",
       "TableReader(Table(t)->Sel([gt(test.t.e, 1)])->HashAgg)->HashAgg",
       "IndexLookUp(Index(t.b)[[-inf,20]], Table(t)->HashAgg)->HashAgg",
       "TableReader(Table(t)->Sel([le(test.t.b, 30)])->StreamAgg)->StreamAgg",
@@ -363,20 +363,18 @@
       {
         "SQL": "explain select * from t where a = 7639902",
         "Plan": [
-          "IndexReader_6 499061.16 root  index:IndexRangeScan_5",
-          "└─IndexRangeScan_5 499061.16 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
+          "IndexReader_6 6.68 root  index:IndexRangeScan_5",
+          "└─IndexRangeScan_5 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
         ]
       },
       {
         "SQL": "explain select c, b from t where a = 7639902 order by b asc limit 6",
         "Plan": [
           "Projection_7 6.00 root  test.t.c, test.t.b",
-          "└─Limit_12 6.00 root  offset:0, count:6",
-          "  └─Projection_23 6.00 root  test.t.a, test.t.b, test.t.c",
-          "    └─IndexLookUp_22 6.00 root  ",
-          "      ├─IndexFullScan_19(Build) 600.00 cop[tikv] table:t, index:b(b) keep order:true",
-          "      └─Selection_21(Probe) 6.00 cop[tikv]  eq(test.t.a, 7639902)",
-          "        └─TableRowIDScan_20 600.00 cop[tikv] table:t keep order:false"
+          "└─TopN_8 6.00 root  test.t.b, offset:0, count:6",
+          "  └─IndexReader_16 6.00 root  index:TopN_15",
+          "    └─TopN_15 6.00 cop[tikv]  test.t.b, offset:0, count:6",
+          "      └─IndexRangeScan_14 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false"
         ]
       }
     ]
diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json
index feb86015d54fb..9243534ef3042 100644
--- a/planner/core/testdata/integration_suite_out.json
+++ b/planner/core/testdata/integration_suite_out.json
@@ -926,8 +926,8 @@
       {
         "SQL": "select * from t1 where t1.a = 1 and t1.b < \"333\"",
         "Plan": [
-          "TableReader_6 1.00 root  data:TableRangeScan_5",
-          "└─TableRangeScan_5 1.00 cop[tikv] table:t1 range:[1 -inf,1 \"333\"), keep order:false"
+          "TableReader_6 0.67 root  data:TableRangeScan_5",
+          "└─TableRangeScan_5 0.67 cop[tikv] table:t1 range:[1 -inf,1 \"333\"), keep order:false"
         ],
         "Res": [
           "1 111 1.1000000000 11"
diff --git a/statistics/feedback.go b/statistics/feedback.go
index 1f042b85d41cc..7e2e4225925ed 100644
--- a/statistics/feedback.go
+++ b/statistics/feedback.go
@@ -518,7 +518,6 @@ func (b *BucketFeedback) splitBucket(newNumBkts int, totalCount float64, originB
 		countInNewBkt := originBucketCount * ratio
 		ndvInNewBkt := int64(float64(originalNdv) * ratio)
 		countInNewBkt, ndvInNewBkt = b.refineBucketCount(sc, newBkt, countInNewBkt, ndvInNewBkt)
-		log.Warn("split bucket", zap.Float64("count", countInNewBkt), zap.Int64("ndv", ndvInNewBkt))
 		// do not split if the count of result bucket is too small.
 		if countInNewBkt < minBucketFraction*totalCount {
 			bounds[i] = bounds[i-1]
@@ -744,18 +743,20 @@ func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int6
 
 // UpdateHistogram updates the histogram according buckets.
 func UpdateHistogram(h *Histogram, feedback *QueryFeedback, statsVer int) *Histogram {
-	buckets, isNewBuckets, totalCount := splitBuckets(h, feedback)
-	ndvs := make([]int64, len(buckets))
-	for i := range buckets {
-		ndvs[i] = buckets[i].Ndv
+	if statsVer < Version2 {
+		// If it's the stats we haven't maintain the bucket NDV yet. Reset the ndv.
+		for i := range feedback.Feedback {
+			feedback.Feedback[i].Ndv = 0
+		}
 	}
-	log.Warn("update hist", zap.Int64s("ndvs", ndvs))
+	buckets, isNewBuckets, totalCount := splitBuckets(h, feedback)
 	buckets = mergeBuckets(buckets, isNewBuckets, float64(totalCount))
 	hist := buildNewHistogram(h, buckets)
 	// Update the NDV of primary key column.
 	if feedback.Tp == PkType {
 		hist.NDV = int64(hist.TotalRowCount())
-	} else if feedback.Tp == IndexType {
+		// If we maintained the NDV of bucket. We can also update the total ndv.
+	} else if feedback.Tp == IndexType && statsVer == 2 {
 		totNdv := int64(0)
 		for _, bkt := range buckets {
 			totNdv += bkt.Ndv
diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go
index 3fb665cc2b738..871c078383cd8 100644
--- a/statistics/handle/update_test.go
+++ b/statistics/handle/update_test.go
@@ -769,25 +769,25 @@ func (s *testStatsSuite) TestQueryFeedback(c *C) {
 			// test primary key feedback
 			sql: "select * from t where t.a <= 5 order by a desc",
 			hist: "column:1 ndv:4 totColSize:0\n" +
-				"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 1\n" +
-				"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 2\n" +
-				"num: 1 lower_bound: 4 upper_bound: 4 repeats: 1 ndv: 1",
+				"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 0\n" +
+				"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n" +
+				"num: 1 lower_bound: 4 upper_bound: 4 repeats: 1 ndv: 0",
 			idxCols: 0,
 		},
 		{
 			// test index feedback by double read
 			sql: "select * from t use index(idx) where t.b <= 5",
-			hist: "index:1 ndv:3\n" +
-				"num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 2\n" +
-				"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1",
+			hist: "index:1 ndv:2\n" +
+				"num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 0\n" +
+				"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 0",
 			idxCols: 1,
 		},
 		{
 			// test index feedback by single read
 			sql: "select b from t use index(idx) where t.b <= 5",
-			hist: "index:1 ndv:3\n" +
-				"num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 2\n" +
-				"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1",
+			hist: "index:1 ndv:2\n" +
+				"num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 0\n" +
+				"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 0",
 			idxCols: 1,
 		},
 	}
@@ -889,22 +889,22 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) {
 			// test primary key feedback
 			sql: "select * from t where t.a <= 5",
 			hist: "column:1 ndv:2 totColSize:0\n" +
-				"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 1\n" +
-				"num: 1 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 1",
+				"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 0\n" +
+				"num: 1 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 0",
 			idxCols: 0,
 		},
 		{
 			// test index feedback by double read
 			sql: "select * from t use index(idx) where t.b <= 5",
 			hist: "index:1 ndv:1\n" +
-				"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 1",
+				"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0",
 			idxCols: 1,
 		},
 		{
 			// test index feedback by single read
 			sql: "select b from t use index(idx) where t.b <= 5",
 			hist: "index:1 ndv:1\n" +
-				"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 1",
+				"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0",
 			idxCols: 1,
 		},
 	}
@@ -1025,8 +1025,8 @@ func (s *testStatsSuite) TestUpdateStatsByLocalFeedback(c *C) {
 	h.UpdateStatsByLocalFeedback(s.do.InfoSchema())
 	tbl := h.GetTableStats(tblInfo)
 
-	c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:1 totColSize:0\n"+
-		"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 1\n"+
+	c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+
+		"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n"+
 		"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+
 		"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0")
 	sc := &stmtctx.StatementContext{TimeZone: time.Local}
@@ -1036,8 +1036,8 @@ func (s *testStatsSuite) TestUpdateStatsByLocalFeedback(c *C) {
 	c.Assert(tbl.Indices[tblInfo.Indices[0].ID].CMSketch.QueryBytes(low), Equals, uint64(2))
 
 	c.Assert(tbl.Indices[tblInfo.Indices[0].ID].ToString(1), Equals, "index:1 ndv:2\n"+
-		"num: 2 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 1\n"+
-		"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1")
+		"num: 2 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 0\n"+
+		"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 0")
 
 	// Test that it won't cause panic after update.
 	testKit.MustQuery("select * from t use index(idx) where b > 0")
@@ -1081,8 +1081,8 @@ func (s *testStatsSuite) TestUpdatePartitionStatsByLocalFeedback(c *C) {
 	pid := tblInfo.Partition.Definitions[0].ID
 	tbl := h.GetPartitionStats(tblInfo, pid)
 
-	c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:1 totColSize:0\n"+
-		"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 1\n"+
+	c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+
+		"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n"+
 		"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+
 		"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0")
 }
@@ -1157,21 +1157,21 @@ func (s *testStatsSuite) TestLogDetailedInfo(c *C) {
 	}{
 		{
 			sql: "select * from t where t.a <= 15",
-			result: "[stats-feedback] test.t, column=a, rangeStr=range: [-inf,8), actual: 8, expected: 8, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 8, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}" +
-				"[stats-feedback] test.t, column=a, rangeStr=range: [8,15), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}",
+			result: "[stats-feedback] test.t, column=a, rangeStr=range: [-inf,8), actual: 8, expected: 8, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 0, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 0}" +
+				"[stats-feedback] test.t, column=a, rangeStr=range: [8,15), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 0}",
 		},
 		{
 			sql: "select * from t use index(idx) where t.b <= 15",
-			result: "[stats-feedback] test.t, index=idx, rangeStr=range: [-inf,8), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 8, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}" +
-				"[stats-feedback] test.t, index=idx, rangeStr=range: [8,16), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8, num: 4 lower_bound: 16 upper_bound: 19 repeats: 1 ndv: 4}",
+			result: "[stats-feedback] test.t, index=idx, rangeStr=range: [-inf,8), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 0, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 0}" +
+				"[stats-feedback] test.t, index=idx, rangeStr=range: [8,16), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 0, num: 4 lower_bound: 16 upper_bound: 19 repeats: 1 ndv: 0}",
 		},
 		{
 			sql:    "select b from t use index(idx_ba) where b = 1 and a <= 5",
-			result: "[stats-feedback] test.t, index=idx_ba, rangeStr=range: [1 -inf,1 6), actual: 1, expected: 0, histogram: {num: 8 lower_bound: (0, 0) upper_bound: (7, 7) repeats: 1 ndv: 8}",
+			result: "[stats-feedback] test.t, index=idx_ba, actual=1, equality=1, expected equality=1, range=range: [-inf,6], actual: -1, expected: 6, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 0}",
 		},
 		{
 			sql:    "select b from t use index(idx_bc) where b = 1 and c <= 5",
-			result: "[stats-feedback] test.t, index=idx_bc, rangeStr=range: [1 -inf,1 6), actual: 1, expected: 0, histogram: {num: 8 lower_bound: (0, 0) upper_bound: (7, 7) repeats: 1 ndv: 8}",
+			result: "[stats-feedback] test.t, index=idx_bc, actual=1, equality=1, expected equality=1, range=[-inf,6], pseudo count=7",
 		},
 		{
 			sql:    "select b from t use index(idx_ba) where b = 1",
@@ -1523,9 +1523,9 @@ func (s *testStatsSuite) TestAbnormalIndexFeedback(c *C) {
 			// The real count of `a = 1` is 0.
 			sql: "select * from t where a = 1 and b < 21",
 			hist: "column:2 ndv:20 totColSize:20\n" +
-				"num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0\n" +
-				"num: 4 lower_bound: 7 upper_bound: 14 repeats: 0\n" +
-				"num: 4 lower_bound: 14 upper_bound: 21 repeats: 0",
+				"num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0 ndv: 0\n" +
+				"num: 4 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 0\n" +
+				"num: 4 lower_bound: 14 upper_bound: 21 repeats: 0 ndv: 0",
 			rangeID: tblInfo.Columns[1].ID,
 			idxID:   tblInfo.Indices[0].ID,
 			eqCount: 3,
@@ -1534,9 +1534,9 @@ func (s *testStatsSuite) TestAbnormalIndexFeedback(c *C) {
 			// The real count of `b > 10` is 0.
 			sql: "select * from t where a = 2 and b > 10",
 			hist: "column:2 ndv:20 totColSize:20\n" +
-				"num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0\n" +
-				"num: 4 lower_bound: 7 upper_bound: 14 repeats: 0\n" +
-				"num: 5 lower_bound: 14 upper_bound: 9223372036854775807 repeats: 0",
+				"num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0 ndv: 0\n" +
+				"num: 4 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 0\n" +
+				"num: 5 lower_bound: 14 upper_bound: 9223372036854775807 repeats: 0 ndv: 0",
 			rangeID: tblInfo.Columns[1].ID,
 			idxID:   tblInfo.Indices[0].ID,
 			eqCount: 3,
@@ -1594,25 +1594,25 @@ func (s *testStatsSuite) TestFeedbackRanges(c *C) {
 		{
 			sql: "select * from t where a <= 50 or (a > 130 and a < 140)",
 			hist: "column:1 ndv:30 totColSize:0\n" +
-				"num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 8\n" +
-				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" +
-				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14",
+				"num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" +
+				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 0",
 			colID: 1,
 		},
 		{
 			sql: "select * from t where a >= 10",
 			hist: "column:1 ndv:30 totColSize:0\n" +
-				"num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 8\n" +
-				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" +
-				"num: 14 lower_bound: 16 upper_bound: 127 repeats: 0 ndv: 14",
+				"num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" +
+				"num: 14 lower_bound: 16 upper_bound: 127 repeats: 0 ndv: 0",
 			colID: 1,
 		},
 		{
 			sql: "select * from t use index(idx) where a = 1 and (b <= 50 or (b > 130 and b < 140))",
 			hist: "column:2 ndv:20 totColSize:30\n" +
-				"num: 8 lower_bound: -128 upper_bound: 7 repeats: 0 ndv: 8\n" +
-				"num: 8 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 8\n" +
-				"num: 7 lower_bound: 14 upper_bound: 51 repeats: 0 ndv: 7",
+				"num: 8 lower_bound: -128 upper_bound: 7 repeats: 0 ndv: 0\n" +
+				"num: 8 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 0\n" +
+				"num: 7 lower_bound: 14 upper_bound: 51 repeats: 0 ndv: 0",
 			colID: 2,
 		},
 	}
@@ -1674,9 +1674,9 @@ func (s *testStatsSuite) TestUnsignedFeedbackRanges(c *C) {
 		{
 			sql: "select * from t where a <= 50",
 			hist: "column:1 ndv:30 totColSize:10\n" +
-				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 8\n" +
-				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" +
-				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14",
+				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" +
+				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 0",
 			tblName: "t",
 		},
 		{
@@ -1690,9 +1690,9 @@ func (s *testStatsSuite) TestUnsignedFeedbackRanges(c *C) {
 		{
 			sql: "select * from t1 where a <= 50",
 			hist: "column:1 ndv:30 totColSize:10\n" +
-				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 8\n" +
-				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" +
-				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14",
+				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" +
+				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 0",
 			tblName: "t1",
 		},
 		{
diff --git a/statistics/histogram.go b/statistics/histogram.go
index 8e4ba627cfb61..9b30432f0d857 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -1032,7 +1032,7 @@ func (idx *Index) equalRowCount(b []byte, modifyCount int64) float64 {
 	if idx.NDV > 0 && idx.outOfRange(val) {
 		return outOfRangeEQSelectivity(idx.NDV, modifyCount, int64(idx.TotalRowCount())) * idx.TotalRowCount()
 	}
-	if idx.CMSketch != nil && idx.StatsVer == Version1 {
+	if idx.CMSketch != nil && idx.StatsVer < Version2 {
 		return float64(idx.QueryBytes(b))
 	}
 	// If it's version2, query the top-n first.
diff --git a/statistics/histogram_test.go b/statistics/histogram_test.go
index cd0196501a1d4..b017fe1bcf0f8 100644
--- a/statistics/histogram_test.go
+++ b/statistics/histogram_test.go
@@ -49,11 +49,11 @@ func (s *testStatisticsSuite) TestNewHistogramBySelectivity(c *C) {
 	node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(13), HighVal: types.MakeDatums(13)})
 	node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(25), HighVal: []types.Datum{types.MaxValueDatum()}})
 	intColResult := `column:1 ndv:16 totColSize:0
-num: 30 lower_bound: 0 upper_bound: 2 repeats: 10
-num: 11 lower_bound: 6 upper_bound: 8 repeats: 0
-num: 30 lower_bound: 9 upper_bound: 11 repeats: 0
-num: 1 lower_bound: 12 upper_bound: 14 repeats: 0
-num: 30 lower_bound: 27 upper_bound: 29 repeats: 0`
+num: 30 lower_bound: 0 upper_bound: 2 repeats: 10 ndv: 0
+num: 11 lower_bound: 6 upper_bound: 8 repeats: 0 ndv: 0
+num: 30 lower_bound: 9 upper_bound: 11 repeats: 0 ndv: 0
+num: 1 lower_bound: 12 upper_bound: 14 repeats: 0 ndv: 0
+num: 30 lower_bound: 27 upper_bound: 29 repeats: 0 ndv: 0`
 
 	stringCol := &Column{}
 	stringCol.Histogram = *NewHistogram(2, 15, 30, 0, types.NewFieldType(mysql.TypeString), chunk.InitialCapacity, 0)
@@ -82,11 +82,11 @@ num: 30 lower_bound: 27 upper_bound: 29 repeats: 0`
 	node2.Ranges = append(node2.Ranges, &ranger.Range{LowVal: types.MakeDatums("ddd"), HighVal: types.MakeDatums("fff")})
 	node2.Ranges = append(node2.Ranges, &ranger.Range{LowVal: types.MakeDatums("ggg"), HighVal: []types.Datum{types.MaxValueDatum()}})
 	stringColResult := `column:2 ndv:9 totColSize:0
-num: 60 lower_bound: a upper_bound: aaaabbbb repeats: 0
-num: 52 lower_bound: bbbb upper_bound: fdsfdsfds repeats: 0
-num: 54 lower_bound: kkkkk upper_bound: ooooo repeats: 0
-num: 60 lower_bound: oooooo upper_bound: sssss repeats: 0
-num: 60 lower_bound: ssssssu upper_bound: yyyyy repeats: 0`
+num: 60 lower_bound: a upper_bound: aaaabbbb repeats: 0 ndv: 0
+num: 52 lower_bound: bbbb upper_bound: fdsfdsfds repeats: 0 ndv: 0
+num: 54 lower_bound: kkkkk upper_bound: ooooo repeats: 0 ndv: 0
+num: 60 lower_bound: oooooo upper_bound: sssss repeats: 0 ndv: 0
+num: 60 lower_bound: ssssssu upper_bound: yyyyy repeats: 0 ndv: 0`
 
 	newColl := coll.NewHistCollBySelectivity(sc, []*StatsNode{node, node2})
 	c.Assert(newColl.Columns[1].String(), Equals, intColResult)
@@ -110,10 +110,10 @@ num: 60 lower_bound: ssssssu upper_bound: yyyyy repeats: 0`
 	node3.Ranges = append(node3.Ranges, &ranger.Range{LowVal: types.MakeDatums(10), HighVal: types.MakeDatums(13)})
 
 	idxResult := `index:0 ndv:7
-num: 30 lower_bound: 0 upper_bound: 2 repeats: 10
-num: 30 lower_bound: 3 upper_bound: 5 repeats: 10
-num: 30 lower_bound: 9 upper_bound: 11 repeats: 10
-num: 30 lower_bound: 12 upper_bound: 14 repeats: 10`
+num: 30 lower_bound: 0 upper_bound: 2 repeats: 10 ndv: 0
+num: 30 lower_bound: 3 upper_bound: 5 repeats: 10 ndv: 0
+num: 30 lower_bound: 9 upper_bound: 11 repeats: 10 ndv: 0
+num: 30 lower_bound: 12 upper_bound: 14 repeats: 10 ndv: 0`
 
 	newColl = coll.NewHistCollBySelectivity(sc, []*StatsNode{node3})
 	c.Assert(newColl.Indices[0].String(), Equals, idxResult)
diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go
index 837cdae704a26..b755d58699581 100644
--- a/statistics/statistics_test.go
+++ b/statistics/statistics_test.go
@@ -680,5 +680,5 @@ func (s *testStatisticsSuite) TestIndexRanges(c *C) {
 	ran[0].HighVal[0] = types.NewIntDatum(1000)
 	count, err = tbl.GetRowCountByIndexRanges(sc, 0, ran)
 	c.Assert(err, IsNil)
-	c.Assert(int(count), Equals, 3)
+	c.Assert(int(count), Equals, 0)
 }
diff --git a/statistics/testdata/stats_suite_out.json b/statistics/testdata/stats_suite_out.json
index 59ec718e6f8f2..2a9895c8cc238 100644
--- a/statistics/testdata/stats_suite_out.json
+++ b/statistics/testdata/stats_suite_out.json
@@ -60,8 +60,8 @@
     "Name": "TestDiscreteDistribution",
     "Cases": [
       [
-        "IndexReader_6 1.02 root  index:IndexRangeScan_5",
-        "└─IndexRangeScan_5 1.02 cop[tikv] table:t, index:idx(a, b) range:[\"tw\" -inf,\"tw\" 0), keep order:false"
+        "IndexReader_6 0.00 root  index:IndexRangeScan_5",
+        "└─IndexRangeScan_5 0.00 cop[tikv] table:t, index:idx(a, b) range:[\"tw\" -inf,\"tw\" 0), keep order:false"
       ]
     ]
   },
@@ -92,8 +92,8 @@
     "Name": "TestCollationColumnEstimate",
     "Cases": [
       [
-        "test t  a 0 0 2 2 \u0000A\u0000A\u0000A \u0000A\u0000A\u0000A 1",
-        "test t  a 0 1 4 2 \u0000B\u0000B\u0000B \u0000B\u0000B\u0000B 1"
+        "test t  a 0 0 2 2 \u0000A\u0000A\u0000A \u0000A\u0000A\u0000A 0",
+        "test t  a 0 1 4 2 \u0000B\u0000B\u0000B \u0000B\u0000B\u0000B 0"
       ],
       [
         "TableReader_7 2.00 root  data:Selection_6",
diff --git a/store/mockstore/unistore/cophandler/cop_handler.go b/store/mockstore/unistore/cophandler/cop_handler.go
index 40cb1a8c7203a..ba81ee4b63c4c 100644
--- a/store/mockstore/unistore/cophandler/cop_handler.go
+++ b/store/mockstore/unistore/cophandler/cop_handler.go
@@ -38,8 +38,10 @@ import (
 	"github.com/pingcap/tidb/util/chunk"
 	"github.com/pingcap/tidb/util/codec"
 	"github.com/pingcap/tidb/util/collate"
+	"github.com/pingcap/tidb/util/logutil"
 	"github.com/pingcap/tidb/util/rowcodec"
 	"github.com/pingcap/tipb/go-tipb"
+	"go.uber.org/zap"
 )
 
 // MPPCtx is the mpp execution context
@@ -147,6 +149,9 @@ func handleCopDAGRequest(dbReader *dbreader.DBReader, lockStore *lockstore.MemSt
 		}
 		return nil
 	}
+	if dagReq.CollectRangeCounts != nil && *dagReq.CollectRangeCounts == true {
+		logutil.BgLogger().Warn("unistore exec", zap.Int("ndv len", len(closureExec.ndvs)))
+	}
 	return buildResp(chunks, closureExec, closureExec.ndvs, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime))
 }
 
diff --git a/util/testkit/testkit.go b/util/testkit/testkit.go
index a3437f3a27a97..1b52f78549678 100644
--- a/util/testkit/testkit.go
+++ b/util/testkit/testkit.go
@@ -319,7 +319,7 @@ func (tk *TestKit) ResultSetToResult(rs sqlexec.RecordSet, comment check.Comment
 // ResultSetToResultWithCtx converts sqlexec.RecordSet to testkit.Result.
 func (tk *TestKit) ResultSetToResultWithCtx(ctx context.Context, rs sqlexec.RecordSet, comment check.CommentInterface) *Result {
 	sRows, err := session.ResultSetToStringSlice(ctx, tk.Se, rs)
-	tk.c.Check(err, check.IsNil, comment)
+	tk.c.Check(errors.ErrorStack(err), check.Equals, "", comment)
 	return &Result{rows: sRows, c: tk.c, comment: comment}
 }
 

From 878b5cac44174a74f47ea2c1005124c3f30f851e Mon Sep 17 00:00:00 2001
From: Yiding Cui <winoros@gmail.com>
Date: Thu, 24 Dec 2020 00:15:09 +0800
Subject: [PATCH 8/9] fix go mod tidy

---
 go.sum                                              | 10 ++++++----
 store/mockstore/unistore/cophandler/closure_exec.go |  2 +-
 store/mockstore/unistore/cophandler/cop_handler.go  |  5 -----
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/go.sum b/go.sum
index 7fd465e28e730..d1835d3b7fea9 100644
--- a/go.sum
+++ b/go.sum
@@ -711,10 +711,6 @@ github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible
 github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible/go.mod h1:XGdcy9+yqlDSEMTpOXnwf3hiTeqrV6MN/u1se9N8yIM=
 github.com/pingcap/tipb v0.0.0-20190428032612-535e1abaa330/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
 github.com/pingcap/tipb v0.0.0-20200417094153-7316d94df1ee/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
-github.com/pingcap/tipb v0.0.0-20201209065231-aa39b1b86217 h1:Ophn4Ud/QHp1BH0FJOzbAVBW9Mw8BlX0gtWkK7ubDy0=
-github.com/pingcap/tipb v0.0.0-20201209065231-aa39b1b86217/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
-github.com/pingcap/tipb v0.0.0-20201210091214-70edbc366d92 h1:+EomCEPnE5MI0HD10wyoiYj1At57midQ4TagtvV9bmY=
-github.com/pingcap/tipb v0.0.0-20201210091214-70edbc366d92/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
 github.com/pingcap/tipb v0.0.0-20201215091753-bd0cb2b314a4 h1:x64INZ8imEXO3MFcWD99lYlp52V9ZdYrxj74ynfyg3c=
 github.com/pingcap/tipb v0.0.0-20201215091753-bd0cb2b314a4/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI=
 github.com/pingcap/tiup v1.2.3 h1:8OCQF7sHhT6VqE8pZU1JTSogPA90OFuWWM/B746x0YY=
@@ -819,6 +815,7 @@ github.com/snowflakedb/gosnowflake v1.3.4/go.mod h1:NsRq2QeiMUuoNUJhp5Q6xGC4uBrs
 github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E=
 github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
 github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
+github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
 github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
 github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
 github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
@@ -971,6 +968,7 @@ go.uber.org/automaxprocs v1.2.0 h1:+RUihKM+nmYUoB9w0D0Ov5TJ2PpFO2FgenTxMJiZBZA=
 go.uber.org/automaxprocs v1.2.0/go.mod h1:YfO3fm683kQpzETxlTGZhGIVmXAhaw3gxeBADbpZtnU=
 go.uber.org/dig v1.8.0/go.mod h1:X34SnWGr8Fyla9zQNO2GSO2D+TIuqB14OS8JhYocIyw=
 go.uber.org/fx v1.10.0/go.mod h1:vLRicqpG/qQEzno4SYU86iCwfT95EZza+Eba0ItuxqY=
+go.uber.org/goleak v0.10.0 h1:G3eWbSNIskeRqtsN/1uI5B+eP73y3JUuBsv9AZjehb4=
 go.uber.org/goleak v0.10.0/go.mod h1:VCZuO8V8mFPlL0F5J5GK1rtHV3DrFcQ1R8ryq7FK0aI=
 go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
 go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4=
@@ -1046,6 +1044,7 @@ golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
 golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
 golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -1212,6 +1211,7 @@ golang.org/x/tools v0.0.0-20200820010801-b793a1359eac h1:DugppSxw0LSF8lcjaODPJZo
 golang.org/x/tools v0.0.0-20200820010801-b793a1359eac/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
 gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
@@ -1281,6 +1281,7 @@ gopkg.in/alecthomas/kingpin.v3-unstable v3.0.0-20180810215634-df19058c872c/go.mo
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b h1:QRR6H1YWRnHb4Y/HeNFCTJLFVxaq6wH4YuVdsUOr75U=
 gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw=
 gopkg.in/cheggaaa/pb.v2 v2.0.7/go.mod h1:0CiZ1p8pvtxBlQpLXkHuUTpdJ1shm3OqCF1QugkjHL4=
@@ -1309,6 +1310,7 @@ gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3M
 gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8=
 gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
 gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo=
+gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
 gopkg.in/tomb.v2 v2.0.0-20161208151619-d5d1b5820637/go.mod h1:BHsqpu/nsuzkT5BpiH1EMZPLyqSMM8JbIavyFACoFNk=
 gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74=
diff --git a/store/mockstore/unistore/cophandler/closure_exec.go b/store/mockstore/unistore/cophandler/closure_exec.go
index 5b7bcc338c0ef..58e20d393a63c 100644
--- a/store/mockstore/unistore/cophandler/closure_exec.go
+++ b/store/mockstore/unistore/cophandler/closure_exec.go
@@ -1267,7 +1267,7 @@ func (e *indexScanProcessor) Finish() error {
 
 func (isc *idxScanCtx) checkVal(curVals [][]byte) bool {
 	for i := 0; i < isc.columnLen; i++ {
-		if bytes.Compare(isc.prevVals[i], curVals[i]) != 0 {
+		if !bytes.Equal(isc.prevVals[i], curVals[i]) {
 			return false
 		}
 	}
diff --git a/store/mockstore/unistore/cophandler/cop_handler.go b/store/mockstore/unistore/cophandler/cop_handler.go
index ba81ee4b63c4c..40cb1a8c7203a 100644
--- a/store/mockstore/unistore/cophandler/cop_handler.go
+++ b/store/mockstore/unistore/cophandler/cop_handler.go
@@ -38,10 +38,8 @@ import (
 	"github.com/pingcap/tidb/util/chunk"
 	"github.com/pingcap/tidb/util/codec"
 	"github.com/pingcap/tidb/util/collate"
-	"github.com/pingcap/tidb/util/logutil"
 	"github.com/pingcap/tidb/util/rowcodec"
 	"github.com/pingcap/tipb/go-tipb"
-	"go.uber.org/zap"
 )
 
 // MPPCtx is the mpp execution context
@@ -149,9 +147,6 @@ func handleCopDAGRequest(dbReader *dbreader.DBReader, lockStore *lockstore.MemSt
 		}
 		return nil
 	}
-	if dagReq.CollectRangeCounts != nil && *dagReq.CollectRangeCounts == true {
-		logutil.BgLogger().Warn("unistore exec", zap.Int("ndv len", len(closureExec.ndvs)))
-	}
 	return buildResp(chunks, closureExec, closureExec.ndvs, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime))
 }
 

From 48cc1bdf6035a8a35131c403dd5fdcd9ac6ab79c Mon Sep 17 00:00:00 2001
From: Yiding Cui <winoros@gmail.com>
Date: Wed, 30 Dec 2020 01:45:47 +0800
Subject: [PATCH 9/9] address comments

---
 statistics/histogram.go | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/statistics/histogram.go b/statistics/histogram.go
index 519d2096514db..7b71d0116463d 100644
--- a/statistics/histogram.go
+++ b/statistics/histogram.go
@@ -211,9 +211,7 @@ func (c *Column) AvgColSizeListInDisk(count int64) float64 {
 
 // AppendBucket appends a bucket into `hg`.
 func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64) {
-	hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat, NDV: 0})
-	hg.Bounds.AppendDatum(0, lower)
-	hg.Bounds.AppendDatum(0, upper)
+	hg.AppendBucketWithNDV(lower, upper, count, repeat, 0)
 }
 
 // AppendBucketWithNDV appends a bucket into `hg` and set value for field `NDV`.