From 425d39e0abb6d7cf0bd353746e7d5469de0834d0 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Thu, 20 Aug 2020 12:14:10 +0800 Subject: [PATCH 1/9] add bucket ndv for index histogram --- .../r/explain_complex_stats.result | 16 +-- cmd/explaintest/r/explain_easy_stats.result | 22 +-- cmd/explaintest/r/explain_join_stats.result | 16 +-- distsql/select_result.go | 3 +- distsql/stream.go | 2 +- executor/show_stats.go | 1 + go.mod | 2 +- go.sum | 4 + planner/core/cbo_test.go | 8 +- planner/core/planbuilder.go | 4 +- planner/core/testdata/analyze_suite_out.json | 16 ++- .../integration_serial_suite_out.json | 2 +- .../core/testdata/integration_suite_out.json | 4 +- session/bootstrap.go | 11 ++ session/session.go | 2 +- statistics/builder.go | 4 +- statistics/feedback.go | 74 +++++++--- statistics/feedback_test.go | 127 +++++++++++------- statistics/handle/handle.go | 6 +- statistics/handle/update.go | 18 ++- statistics/handle/update_test.go | 100 +++++++------- statistics/histogram.go | 59 +++++--- statistics/statistics_test.go | 2 +- statistics/table.go | 3 + statistics/testdata/stats_suite_out.json | 8 +- .../unistore/cophandler/closure_exec.go | 31 +++++ .../unistore/cophandler/cop_handler.go | 7 +- 27 files changed, 350 insertions(+), 202 deletions(-) diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result index 6aac4462e7763..15274e5cafa51 100644 --- a/cmd/explaintest/r/explain_complex_stats.result +++ b/cmd/explaintest/r/explain_complex_stats.result @@ -115,14 +115,14 @@ PRIMARY KEY (aid,dic) load stats 's/explain_complex_stats_rr.json'; explain SELECT ds, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(dic) as install_device FROM dt use index (cm) WHERE (ds >= '2016-09-01') AND (ds <= '2016-11-03') AND (cm IN ('1062', '1086', '1423', '1424', '1425', '1426', '1427', '1428', '1429', '1430', '1431', '1432', '1433', '1434', '1435', '1436', '1437', '1438', '1439', '1440', '1441', '1442', '1443', '1444', '1445', '1446', '1447', '1448', '1449', '1450', '1451', '1452', '1488', '1489', '1490', '1491', '1492', '1493', '1494', '1495', '1496', '1497', '1550', '1551', '1552', '1553', '1554', '1555', '1556', '1557', '1558', '1559', '1597', '1598', '1599', '1600', '1601', '1602', '1603', '1604', '1605', '1606', '1607', '1608', '1609', '1610', '1611', '1612', '1613', '1614', '1615', '1616', '1623', '1624', '1625', '1626', '1627', '1628', '1629', '1630', '1631', '1632', '1709', '1719', '1720', '1843', '2813', '2814', '2815', '2816', '2817', '2818', '2819', '2820', '2821', '2822', '2823', '2824', '2825', '2826', '2827', '2828', '2829', '2830', '2831', '2832', '2833', '2834', '2835', '2836', '2837', '2838', '2839', '2840', '2841', '2842', '2843', '2844', '2845', '2846', '2847', '2848', '2849', '2850', '2851', '2852', '2853', '2854', '2855', '2856', '2857', '2858', '2859', '2860', '2861', '2862', '2863', '2864', '2865', '2866', '2867', '2868', '2869', '2870', '2871', '2872', '3139', '3140', '3141', '3142', '3143', '3144', '3145', '3146', '3147', '3148', '3149', '3150', '3151', '3152', '3153', '3154', '3155', '3156', '3157', '3158', '3386', '3387', '3388', '3389', '3390', '3391', '3392', '3393', '3394', '3395', '3664', '3665', '3666', '3667', '3668', '3670', '3671', '3672', '3673', '3674', '3676', '3677', '3678', '3679', '3680', '3681', '3682', '3683', '3684', '3685', '3686', '3687', '3688', '3689', '3690', '3691', '3692', '3693', '3694', '3695', '3696', '3697', '3698', '3699', '3700', '3701', '3702', '3703', '3704', '3705', '3706', '3707', '3708', '3709', '3710', '3711', '3712', '3713', '3714', '3715', '3960', '3961', '3962', '3963', '3964', '3965', '3966', '3967', '3968', '3978', '3979', '3980', '3981', '3982', '3983', '3984', '3985', '3986', '3987', '4208', '4209', '4210', '4211', '4212', '4304', '4305', '4306', '4307', '4308', '4866', '4867', '4868', '4869', '4870', '4871', '4872', '4873', '4874', '4875')) GROUP BY ds, p1, p2, p3, p4, p5, p6_md5, p7_md5 ORDER BY ds2 DESC; id estRows task access object operator info -Projection_7 21.53 root test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, Column#21 -└─Sort_8 21.53 root test.dt.ds2:desc - └─HashAgg_16 21.53 root group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(Column#32)->Column#21, funcs:firstrow(test.dt.ds)->test.dt.ds, funcs:firstrow(Column#34)->test.dt.ds2, funcs:firstrow(test.dt.p1)->test.dt.p1, funcs:firstrow(test.dt.p2)->test.dt.p2, funcs:firstrow(test.dt.p3)->test.dt.p3, funcs:firstrow(test.dt.p4)->test.dt.p4, funcs:firstrow(test.dt.p5)->test.dt.p5, funcs:firstrow(test.dt.p6_md5)->test.dt.p6_md5, funcs:firstrow(test.dt.p7_md5)->test.dt.p7_md5 - └─IndexLookUp_17 21.53 root - ├─IndexRangeScan_13(Build) 128.32 cop[tikv] table:dt, index:cm(cm) range:[1062,1062], [1086,1086], [1423,1423], [1424,1424], [1425,1425], [1426,1426], [1427,1427], [1428,1428], [1429,1429], [1430,1430], [1431,1431], [1432,1432], [1433,1433], [1434,1434], [1435,1435], [1436,1436], [1437,1437], [1438,1438], [1439,1439], [1440,1440], [1441,1441], [1442,1442], [1443,1443], [1444,1444], [1445,1445], [1446,1446], [1447,1447], [1448,1448], [1449,1449], [1450,1450], [1451,1451], [1452,1452], [1488,1488], [1489,1489], [1490,1490], [1491,1491], [1492,1492], [1493,1493], [1494,1494], [1495,1495], [1496,1496], [1497,1497], [1550,1550], [1551,1551], [1552,1552], [1553,1553], [1554,1554], [1555,1555], [1556,1556], [1557,1557], [1558,1558], [1559,1559], [1597,1597], [1598,1598], [1599,1599], [1600,1600], [1601,1601], [1602,1602], [1603,1603], [1604,1604], [1605,1605], [1606,1606], [1607,1607], [1608,1608], [1609,1609], [1610,1610], [1611,1611], [1612,1612], [1613,1613], [1614,1614], [1615,1615], [1616,1616], [1623,1623], [1624,1624], [1625,1625], [1626,1626], [1627,1627], [1628,1628], [1629,1629], [1630,1630], [1631,1631], [1632,1632], [1709,1709], [1719,1719], [1720,1720], [1843,1843], [2813,2813], [2814,2814], [2815,2815], [2816,2816], [2817,2817], [2818,2818], [2819,2819], [2820,2820], [2821,2821], [2822,2822], [2823,2823], [2824,2824], [2825,2825], [2826,2826], [2827,2827], [2828,2828], [2829,2829], [2830,2830], [2831,2831], [2832,2832], [2833,2833], [2834,2834], [2835,2835], [2836,2836], [2837,2837], [2838,2838], [2839,2839], [2840,2840], [2841,2841], [2842,2842], [2843,2843], [2844,2844], [2845,2845], [2846,2846], [2847,2847], [2848,2848], [2849,2849], [2850,2850], [2851,2851], [2852,2852], [2853,2853], [2854,2854], [2855,2855], [2856,2856], [2857,2857], [2858,2858], [2859,2859], [2860,2860], [2861,2861], [2862,2862], [2863,2863], [2864,2864], [2865,2865], [2866,2866], [2867,2867], [2868,2868], [2869,2869], [2870,2870], [2871,2871], [2872,2872], [3139,3139], [3140,3140], [3141,3141], [3142,3142], [3143,3143], [3144,3144], [3145,3145], [3146,3146], [3147,3147], [3148,3148], [3149,3149], [3150,3150], [3151,3151], [3152,3152], [3153,3153], [3154,3154], [3155,3155], [3156,3156], [3157,3157], [3158,3158], [3386,3386], [3387,3387], [3388,3388], [3389,3389], [3390,3390], [3391,3391], [3392,3392], [3393,3393], [3394,3394], [3395,3395], [3664,3664], [3665,3665], [3666,3666], [3667,3667], [3668,3668], [3670,3670], [3671,3671], [3672,3672], [3673,3673], [3674,3674], [3676,3676], [3677,3677], [3678,3678], [3679,3679], [3680,3680], [3681,3681], [3682,3682], [3683,3683], [3684,3684], [3685,3685], [3686,3686], [3687,3687], [3688,3688], [3689,3689], [3690,3690], [3691,3691], [3692,3692], [3693,3693], [3694,3694], [3695,3695], [3696,3696], [3697,3697], [3698,3698], [3699,3699], [3700,3700], [3701,3701], [3702,3702], [3703,3703], [3704,3704], [3705,3705], [3706,3706], [3707,3707], [3708,3708], [3709,3709], [3710,3710], [3711,3711], [3712,3712], [3713,3713], [3714,3714], [3715,3715], [3960,3960], [3961,3961], [3962,3962], [3963,3963], [3964,3964], [3965,3965], [3966,3966], [3967,3967], [3968,3968], [3978,3978], [3979,3979], [3980,3980], [3981,3981], [3982,3982], [3983,3983], [3984,3984], [3985,3985], [3986,3986], [3987,3987], [4208,4208], [4209,4209], [4210,4210], [4211,4211], [4212,4212], [4304,4304], [4305,4305], [4306,4306], [4307,4307], [4308,4308], [4866,4866], [4867,4867], [4868,4868], [4869,4869], [4870,4870], [4871,4871], [4872,4872], [4873,4873], [4874,4874], [4875,4875], keep order:false - └─HashAgg_11(Probe) 21.53 cop[tikv] group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(test.dt.dic)->Column#32, funcs:firstrow(test.dt.ds2)->Column#34 - └─Selection_15 21.56 cop[tikv] ge(test.dt.ds, 2016-09-01 00:00:00.000000), le(test.dt.ds, 2016-11-03 00:00:00.000000) - └─TableRowIDScan_14 128.32 cop[tikv] table:dt keep order:false +Projection_7 308.93 root test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, Column#21 +└─Sort_8 308.93 root test.dt.ds2:desc + └─HashAgg_16 308.93 root group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(Column#32)->Column#21, funcs:firstrow(test.dt.ds)->test.dt.ds, funcs:firstrow(Column#34)->test.dt.ds2, funcs:firstrow(test.dt.p1)->test.dt.p1, funcs:firstrow(test.dt.p2)->test.dt.p2, funcs:firstrow(test.dt.p3)->test.dt.p3, funcs:firstrow(test.dt.p4)->test.dt.p4, funcs:firstrow(test.dt.p5)->test.dt.p5, funcs:firstrow(test.dt.p6_md5)->test.dt.p6_md5, funcs:firstrow(test.dt.p7_md5)->test.dt.p7_md5 + └─IndexLookUp_17 308.93 root + ├─IndexRangeScan_13(Build) 1841.60 cop[tikv] table:dt, index:cm(cm) range:[1062,1062], [1086,1086], [1423,1423], [1424,1424], [1425,1425], [1426,1426], [1427,1427], [1428,1428], [1429,1429], [1430,1430], [1431,1431], [1432,1432], [1433,1433], [1434,1434], [1435,1435], [1436,1436], [1437,1437], [1438,1438], [1439,1439], [1440,1440], [1441,1441], [1442,1442], [1443,1443], [1444,1444], [1445,1445], [1446,1446], [1447,1447], [1448,1448], [1449,1449], [1450,1450], [1451,1451], [1452,1452], [1488,1488], [1489,1489], [1490,1490], [1491,1491], [1492,1492], [1493,1493], [1494,1494], [1495,1495], [1496,1496], [1497,1497], [1550,1550], [1551,1551], [1552,1552], [1553,1553], [1554,1554], [1555,1555], [1556,1556], [1557,1557], [1558,1558], [1559,1559], [1597,1597], [1598,1598], [1599,1599], [1600,1600], [1601,1601], [1602,1602], [1603,1603], [1604,1604], [1605,1605], [1606,1606], [1607,1607], [1608,1608], [1609,1609], [1610,1610], [1611,1611], [1612,1612], [1613,1613], [1614,1614], [1615,1615], [1616,1616], [1623,1623], [1624,1624], [1625,1625], [1626,1626], [1627,1627], [1628,1628], [1629,1629], [1630,1630], [1631,1631], [1632,1632], [1709,1709], [1719,1719], [1720,1720], [1843,1843], [2813,2813], [2814,2814], [2815,2815], [2816,2816], [2817,2817], [2818,2818], [2819,2819], [2820,2820], [2821,2821], [2822,2822], [2823,2823], [2824,2824], [2825,2825], [2826,2826], [2827,2827], [2828,2828], [2829,2829], [2830,2830], [2831,2831], [2832,2832], [2833,2833], [2834,2834], [2835,2835], [2836,2836], [2837,2837], [2838,2838], [2839,2839], [2840,2840], [2841,2841], [2842,2842], [2843,2843], [2844,2844], [2845,2845], [2846,2846], [2847,2847], [2848,2848], [2849,2849], [2850,2850], [2851,2851], [2852,2852], [2853,2853], [2854,2854], [2855,2855], [2856,2856], [2857,2857], [2858,2858], [2859,2859], [2860,2860], [2861,2861], [2862,2862], [2863,2863], [2864,2864], [2865,2865], [2866,2866], [2867,2867], [2868,2868], [2869,2869], [2870,2870], [2871,2871], [2872,2872], [3139,3139], [3140,3140], [3141,3141], [3142,3142], [3143,3143], [3144,3144], [3145,3145], [3146,3146], [3147,3147], [3148,3148], [3149,3149], [3150,3150], [3151,3151], [3152,3152], [3153,3153], [3154,3154], [3155,3155], [3156,3156], [3157,3157], [3158,3158], [3386,3386], [3387,3387], [3388,3388], [3389,3389], [3390,3390], [3391,3391], [3392,3392], [3393,3393], [3394,3394], [3395,3395], [3664,3664], [3665,3665], [3666,3666], [3667,3667], [3668,3668], [3670,3670], [3671,3671], [3672,3672], [3673,3673], [3674,3674], [3676,3676], [3677,3677], [3678,3678], [3679,3679], [3680,3680], [3681,3681], [3682,3682], [3683,3683], [3684,3684], [3685,3685], [3686,3686], [3687,3687], [3688,3688], [3689,3689], [3690,3690], [3691,3691], [3692,3692], [3693,3693], [3694,3694], [3695,3695], [3696,3696], [3697,3697], [3698,3698], [3699,3699], [3700,3700], [3701,3701], [3702,3702], [3703,3703], [3704,3704], [3705,3705], [3706,3706], [3707,3707], [3708,3708], [3709,3709], [3710,3710], [3711,3711], [3712,3712], [3713,3713], [3714,3714], [3715,3715], [3960,3960], [3961,3961], [3962,3962], [3963,3963], [3964,3964], [3965,3965], [3966,3966], [3967,3967], [3968,3968], [3978,3978], [3979,3979], [3980,3980], [3981,3981], [3982,3982], [3983,3983], [3984,3984], [3985,3985], [3986,3986], [3987,3987], [4208,4208], [4209,4209], [4210,4210], [4211,4211], [4212,4212], [4304,4304], [4305,4305], [4306,4306], [4307,4307], [4308,4308], [4866,4866], [4867,4867], [4868,4868], [4869,4869], [4870,4870], [4871,4871], [4872,4872], [4873,4873], [4874,4874], [4875,4875], keep order:false + └─HashAgg_11(Probe) 308.93 cop[tikv] group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(test.dt.dic)->Column#32, funcs:firstrow(test.dt.ds2)->Column#34 + └─Selection_15 309.39 cop[tikv] ge(test.dt.ds, 2016-09-01 00:00:00.000000), le(test.dt.ds, 2016-11-03 00:00:00.000000) + └─TableRowIDScan_14 1841.60 cop[tikv] table:dt keep order:false explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext, gad.t as gtime from st gad join (select id, aid, pt, dic, ip, t from dd where pt = 'android' and bm = 0 and t > 1478143908) sdk on gad.aid = sdk.aid and gad.ip = sdk.ip and sdk.t > gad.t where gad.t > 1478143908 and gad.bm = 0 and gad.pt = 'android' group by gad.aid, sdk.dic limit 2500; id estRows task access object operator info Projection_13 424.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result index fd40354ca817e..35ee16645a364 100644 --- a/cmd/explaintest/r/explain_easy_stats.result +++ b/cmd/explaintest/r/explain_easy_stats.result @@ -42,16 +42,16 @@ TableReader_6 1999.00 root data:TableRangeScan_5 └─TableRangeScan_5 1999.00 cop[tikv] table:t1 range:(0,+inf], keep order:false explain select t1.c1, t1.c2 from t1 where t1.c2 = 1; id estRows task access object operator info -IndexReader_6 0.00 root index:IndexRangeScan_5 -└─IndexRangeScan_5 0.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false +IndexReader_6 8.00 root index:IndexRangeScan_5 +└─IndexRangeScan_5 8.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1; id estRows task access object operator info HashJoin_22 2481.25 root left outer join, equal:[eq(test.t1.c2, test.t2.c1)] ├─TableReader_36(Build) 1985.00 root data:Selection_35 │ └─Selection_35 1985.00 cop[tikv] not(isnull(test.t2.c1)) │ └─TableFullScan_34 1985.00 cop[tikv] table:t2 keep order:false -└─TableReader_33(Probe) 1998.00 root data:TableRangeScan_32 - └─TableRangeScan_32 1998.00 cop[tikv] table:t1 range:(1,+inf], keep order:false +└─TableReader_33(Probe) 1991.00 root data:TableRangeScan_32 + └─TableRangeScan_32 1991.00 cop[tikv] table:t1 range:(1,+inf], keep order:false explain update t1 set t1.c2 = 2 where t1.c1 = 1; id estRows task access object operator info Update_2 N/A root N/A @@ -59,9 +59,9 @@ Update_2 N/A root N/A explain delete from t1 where t1.c2 = 1; id estRows task access object operator info Delete_4 N/A root N/A -└─IndexLookUp_11 0.00 root - ├─IndexRangeScan_9(Build) 0.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false - └─TableRowIDScan_10(Probe) 0.00 cop[tikv] table:t1 keep order:false +└─IndexLookUp_11 8.00 root + ├─IndexRangeScan_9(Build) 8.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false + └─TableRowIDScan_10(Probe) 8.00 cop[tikv] table:t1 keep order:false explain select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1; id estRows task access object operator info Projection_11 1985.00 root cast(Column#8, bigint(21) BINARY)->Column#7 @@ -80,10 +80,10 @@ TopN_7 1.00 root test.t2.c2, offset:0, count:1 └─TableFullScan_13 1985.00 cop[tikv] table:t2 keep order:false explain select * from t1 where c1 > 1 and c2 = 1 and c3 < 1; id estRows task access object operator info -IndexLookUp_11 0.00 root -├─IndexRangeScan_8(Build) 0.00 cop[tikv] table:t1, index:c2(c2) range:(1 1,1 +inf], keep order:false -└─Selection_10(Probe) 0.00 cop[tikv] lt(test.t1.c3, 1) - └─TableRowIDScan_9 0.00 cop[tikv] table:t1 keep order:false +IndexLookUp_11 0.51 root +├─IndexRangeScan_8(Build) 1.00 cop[tikv] table:t1, index:c2(c2) range:(1 1,1 +inf], keep order:false +└─Selection_10(Probe) 0.51 cop[tikv] lt(test.t1.c3, 1) + └─TableRowIDScan_9 1.00 cop[tikv] table:t1 keep order:false explain select * from t1 where c1 = 1 and c2 > 1; id estRows task access object operator info Selection_6 0.50 root gt(test.t1.c2, 1) diff --git a/cmd/explaintest/r/explain_join_stats.result b/cmd/explaintest/r/explain_join_stats.result index 73bb6e2671ba4..4686025ea6703 100644 --- a/cmd/explaintest/r/explain_join_stats.result +++ b/cmd/explaintest/r/explain_join_stats.result @@ -7,21 +7,21 @@ load stats 's/explain_join_stats_lo.json'; explain select count(*) from e, lo where lo.a=e.a and e.b=22336; id estRows task access object operator info StreamAgg_13 1.00 root funcs:count(1)->Column#5 -└─HashJoin_89 19977.00 root inner join, equal:[eq(test.lo.a, test.e.a)] +└─HashJoin_89 20044.00 root inner join, equal:[eq(test.lo.a, test.e.a)] ├─TableReader_50(Build) 250.00 root data:TableFullScan_49 │ └─TableFullScan_49 250.00 cop[tikv] table:lo keep order:false - └─IndexLookUp_61(Probe) 19977.00 root - ├─IndexRangeScan_58(Build) 19977.00 cop[tikv] table:e, index:idx_b(b) range:[22336,22336], keep order:false - └─Selection_60(Probe) 19977.00 cop[tikv] not(isnull(test.e.a)) - └─TableRowIDScan_59 19977.00 cop[tikv] table:e keep order:false + └─IndexLookUp_61(Probe) 20044.00 root + ├─IndexRangeScan_58(Build) 20044.00 cop[tikv] table:e, index:idx_b(b) range:[22336,22336], keep order:false + └─Selection_60(Probe) 20044.00 cop[tikv] not(isnull(test.e.a)) + └─TableRowIDScan_59 20044.00 cop[tikv] table:e keep order:false explain select /*+ TIDB_INLJ(e) */ count(*) from e, lo where lo.a=e.a and e.b=22336; id estRows task access object operator info StreamAgg_12 1.00 root funcs:count(1)->Column#5 -└─IndexJoin_56 19977.00 root inner join, inner:IndexLookUp_55, outer key:test.lo.a, inner key:test.e.a +└─IndexJoin_56 20044.00 root inner join, inner:IndexLookUp_55, outer key:test.lo.a, inner key:test.e.a ├─TableReader_40(Build) 250.00 root data:TableFullScan_39 │ └─TableFullScan_39 250.00 cop[tikv] table:lo keep order:false - └─IndexLookUp_55(Probe) 79.91 root + └─IndexLookUp_55(Probe) 80.18 root ├─Selection_53(Build) 4080.00 cop[tikv] not(isnull(test.e.a)) │ └─IndexRangeScan_51 4080.00 cop[tikv] table:e, index:idx_a(a) range: decided by [eq(test.e.a, test.lo.a)], keep order:false - └─Selection_54(Probe) 79.91 cop[tikv] eq(test.e.b, 22336) + └─Selection_54(Probe) 80.18 cop[tikv] eq(test.e.b, 22336) └─TableRowIDScan_52 4080.00 cop[tikv] table:e keep order:false diff --git a/distsql/select_result.go b/distsql/select_result.go index 02d7dc77acf05..f177fd3632106 100644 --- a/distsql/select_result.go +++ b/distsql/select_result.go @@ -139,7 +139,8 @@ func (r *selectResult) fetchResp(ctx context.Context) error { for _, warning := range r.selectResp.Warnings { sc.AppendWarning(terror.ClassTiKV.Synthesize(terror.ErrCode(warning.Code), warning.Msg)) } - r.feedback.Update(resultSubset.GetStartKey(), r.selectResp.OutputCounts) + logutil.BgLogger().Warn("select resp", zap.Int64s("output cnt", r.selectResp.OutputCounts), zap.Int64s("ndvs", r.selectResp.Ndvs)) + r.feedback.Update(resultSubset.GetStartKey(), r.selectResp.OutputCounts, r.selectResp.Ndvs) r.partialCount++ hasStats, ok := resultSubset.(CopRuntimeStats) diff --git a/distsql/stream.go b/distsql/stream.go index f1817084cdf44..c5618d95a03f9 100644 --- a/distsql/stream.go +++ b/distsql/stream.go @@ -104,7 +104,7 @@ func (r *streamResult) readDataFromResponse(ctx context.Context, resp kv.Respons if err != nil { return false, errors.Trace(err) } - r.feedback.Update(resultSubset.GetStartKey(), stream.OutputCounts) + r.feedback.Update(resultSubset.GetStartKey(), stream.OutputCounts, stream.Ndvs) r.partialCount++ hasStats, ok := resultSubset.(CopRuntimeStats) diff --git a/executor/show_stats.go b/executor/show_stats.go index 0c1461821528c..da05837f61dc2 100644 --- a/executor/show_stats.go +++ b/executor/show_stats.go @@ -181,6 +181,7 @@ func (e *ShowExec) bucketsToRows(dbName, tblName, partitionName, colName string, hist.Buckets[i].Repeat, lowerBoundStr, upperBoundStr, + hist.Buckets[i].NDV, }) } return nil diff --git a/go.mod b/go.mod index 1e8246c97c058..e63359ec62781 100644 --- a/go.mod +++ b/go.mod @@ -40,7 +40,7 @@ require ( github.com/pingcap/pd/v4 v4.0.0-rc.2.0.20200730093003-dc8c75cf7ca0 github.com/pingcap/sysutil v0.0.0-20200715082929-4c47bcac246a github.com/pingcap/tidb-tools v4.0.1+incompatible - github.com/pingcap/tipb v0.0.0-20200618092958-4fad48b4c8c3 + github.com/pingcap/tipb v0.0.0-20201013162506-6fc729765611 github.com/prometheus/client_golang v1.5.1 github.com/prometheus/client_model v0.2.0 github.com/prometheus/common v0.9.1 diff --git a/go.sum b/go.sum index b7c779284d851..781d0774daf9f 100644 --- a/go.sum +++ b/go.sum @@ -516,6 +516,10 @@ github.com/pingcap/tipb v0.0.0-20200604070248-508f03b0b342/go.mod h1:RtkHW8WbcNx github.com/pingcap/tipb v0.0.0-20200615034523-dcfcea0b5965/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= github.com/pingcap/tipb v0.0.0-20200618092958-4fad48b4c8c3 h1:ESL3eIt1kUt8IMvR1011ejZlAyDcOzw89ARvVHvpD5k= github.com/pingcap/tipb v0.0.0-20200618092958-4fad48b4c8c3/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= +github.com/pingcap/tipb v0.0.0-20200819200035-714bd87bf361 h1:HSe6jRjauAbijvoQdJO/xOdXAY5gUYLpLO0G5HyBHzg= +github.com/pingcap/tipb v0.0.0-20200819200035-714bd87bf361/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= +github.com/pingcap/tipb v0.0.0-20201013162506-6fc729765611 h1:GVmsE4VK2NZK0v2j2xUQoba+UDMh4/da+ScIJBjd1og= +github.com/pingcap/tipb v0.0.0-20201013162506-6fc729765611/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go index 781952adf3e05..2c5ca439d28dc 100644 --- a/planner/core/cbo_test.go +++ b/planner/core/cbo_test.go @@ -552,10 +552,10 @@ func (s *testAnalyzeSuite) TestInconsistentEstimation(c *C) { // the `a = 5 and c = 5` will get 10, it is not consistent. tk.MustQuery("explain select * from t use index(ab) where a = 5 and c = 5"). Check(testkit.Rows( - "IndexLookUp_8 10.00 root ", - "├─IndexRangeScan_5(Build) 12.50 cop[tikv] table:t, index:ab(a, b) range:[5,5], keep order:false", - "└─Selection_7(Probe) 10.00 cop[tikv] eq(test.t.c, 5)", - " └─TableRowIDScan_6 12.50 cop[tikv] table:t keep order:false", + "IndexLookUp_8 7.00 root ", + "├─IndexRangeScan_5(Build) 8.75 cop[tikv] table:t, index:ab(a, b) range:[5,5], keep order:false", + "└─Selection_7(Probe) 7.00 cop[tikv] eq(test.t.c, 5)", + " └─TableRowIDScan_6 8.75 cop[tikv] table:t keep order:false", )) } diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index 7e7ab31489c0e..2dbd09dbac545 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -3316,9 +3316,9 @@ func buildShowSchema(s *ast.ShowStmt, isView bool, isSequence bool) (schema *exp mysql.TypeLonglong, mysql.TypeLonglong, mysql.TypeDouble, mysql.TypeDouble} case ast.ShowStatsBuckets: names = []string{"Db_name", "Table_name", "Partition_name", "Column_name", "Is_index", "Bucket_id", "Count", - "Repeats", "Lower_Bound", "Upper_Bound"} + "Repeats", "Lower_Bound", "Upper_Bound", "Ndv"} ftypes = []byte{mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeTiny, mysql.TypeLonglong, - mysql.TypeLonglong, mysql.TypeLonglong, mysql.TypeVarchar, mysql.TypeVarchar} + mysql.TypeLonglong, mysql.TypeLonglong, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeLonglong} case ast.ShowStatsHealthy: names = []string{"Db_name", "Table_name", "Partition_name", "Healthy"} ftypes = []byte{mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeLonglong} diff --git a/planner/core/testdata/analyze_suite_out.json b/planner/core/testdata/analyze_suite_out.json index 001454fbce6b5..02700cacf886e 100644 --- a/planner/core/testdata/analyze_suite_out.json +++ b/planner/core/testdata/analyze_suite_out.json @@ -306,7 +306,7 @@ "IndexReader(Index(t.e)[[-inf,10]]->StreamAgg)->StreamAgg", "IndexReader(Index(t.e)[[-inf,50]]->StreamAgg)->StreamAgg", "IndexReader(Index(t.b_c)[[NULL,+inf]]->Sel([gt(test.t.c, 1)])->HashAgg)->HashAgg", - "IndexLookUp(Index(t.e)[[1,1]], Table(t))->HashAgg", + "IndexLookUp(Index(t.e)[[1,1]], Table(t)->HashAgg)->HashAgg", "TableReader(Table(t)->Sel([gt(test.t.e, 1)])->HashAgg)->HashAgg", "IndexLookUp(Index(t.b)[[-inf,20]], Table(t)->HashAgg)->HashAgg", "TableReader(Table(t)->Sel([le(test.t.b, 30)])->StreamAgg)->StreamAgg", @@ -347,18 +347,20 @@ { "SQL": "explain select * from t where a = 7639902", "Plan": [ - "IndexReader_6 6.68 root index:IndexRangeScan_5", - "└─IndexRangeScan_5 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false" + "IndexReader_6 499061.16 root index:IndexRangeScan_5", + "└─IndexRangeScan_5 499061.16 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false" ] }, { "SQL": "explain select c, b from t where a = 7639902 order by b asc limit 6", "Plan": [ "Projection_7 6.00 root test.t.c, test.t.b", - "└─TopN_8 6.00 root test.t.b, offset:0, count:6", - " └─IndexReader_16 6.00 root index:TopN_15", - " └─TopN_15 6.00 cop[tikv] test.t.b, offset:0, count:6", - " └─IndexRangeScan_14 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false" + "└─Limit_12 6.00 root offset:0, count:6", + " └─Projection_23 6.00 root test.t.a, test.t.b, test.t.c", + " └─IndexLookUp_22 6.00 root ", + " ├─IndexFullScan_19(Build) 600.00 cop[tikv] table:t, index:b(b) keep order:true", + " └─Selection_21(Probe) 6.00 cop[tikv] eq(test.t.a, 7639902)", + " └─TableRowIDScan_20 600.00 cop[tikv] table:t keep order:false" ] } ] diff --git a/planner/core/testdata/integration_serial_suite_out.json b/planner/core/testdata/integration_serial_suite_out.json index b3be7bdeeab4f..6aee43d9d27e1 100644 --- a/planner/core/testdata/integration_serial_suite_out.json +++ b/planner/core/testdata/integration_serial_suite_out.json @@ -29,7 +29,7 @@ "StreamAgg_32 1.00 root funcs:count(Column#14)->Column#11", "└─TableReader_33 1.00 root data:StreamAgg_13", " └─StreamAgg_13 1.00 cop[tiflash] funcs:count(1)->Column#14", - " └─BroadcastJoin_31 8.00 cop[tiflash] inner join, left key:test.fact_t.d1_k, right key:test.d1_t.d1_k", + " └─BroadcastJoin_31 8.00 cop[tiflash] inner join, left key:test.fact_t.d1_k, right key:test.d1_t.d1_k", " ├─Selection_23(Build) 2.00 cop[tiflash] not(isnull(test.d1_t.d1_k))", " │ └─TableFullScan_22 2.00 cop[tiflash] table:d1_t keep order:false, global read", " └─Selection_21(Probe) 8.00 cop[tiflash] not(isnull(test.fact_t.d1_k))", diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index 7480a3dccd376..fedaa72a6b84c 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -857,8 +857,8 @@ { "SQL": "select * from t1 where t1.a = 1 and t1.b < \"333\"", "Plan": [ - "TableReader_6 0.67 root data:TableRangeScan_5", - "└─TableRangeScan_5 0.67 cop[tikv] table:t1 range:[1 -inf,1 \"333\"), keep order:false" + "TableReader_6 1.00 root data:TableRangeScan_5", + "└─TableRangeScan_5 1.00 cop[tikv] table:t1 range:[1 -inf,1 \"333\"), keep order:false" ], "Res": [ "1 111 1.1000000000 11" diff --git a/session/bootstrap.go b/session/bootstrap.go index 95dfff6586123..af7b42c8ffd51 100644 --- a/session/bootstrap.go +++ b/session/bootstrap.go @@ -198,6 +198,7 @@ const ( repeats bigint(64) NOT NULL, upper_bound blob NOT NULL, lower_bound blob , + ndv bigint NOT NULL DEFAULT 0, unique index tbl(table_id, is_index, hist_id, bucket_id) );` @@ -423,6 +424,8 @@ const ( version49 = 49 // version50 add mysql.schema_index_usage table. version50 = 50 + // version51 add column ndv for mysql.stats_buckets. + version51 = 51 ) var ( @@ -476,6 +479,7 @@ var ( upgradeToVer48, upgradeToVer49, upgradeToVer50, + upgradeToVer51, } ) @@ -1170,6 +1174,13 @@ func upgradeToVer50(s Session, ver int64) { doReentrantDDL(s, CreateSchemaIndexUsageTable) } +func upgradeToVer51(s Session, ver int64) { + if ver >= version51 { + return + } + doReentrantDDL(s, "ALTER TABLE mysql.stats_buckets ADD COLUMN `ndv` bigint not null default 0", infoschema.ErrColumnExists) +} + // updateBootstrapVer updates bootstrap version variable in mysql.TiDB table. func updateBootstrapVer(s Session) { // Update bootstrap version. diff --git a/session/session.go b/session/session.go index 440d9b684066d..01b5b118ae053 100644 --- a/session/session.go +++ b/session/session.go @@ -1948,7 +1948,7 @@ func CreateSessionWithDomain(store kv.Storage, dom *domain.Domain) (*session, er const ( notBootstrapped = 0 - currentBootstrapVersion = version50 + currentBootstrapVersion = version51 ) func getStoreBootstrapVersion(store kv.Storage) int64 { diff --git a/statistics/builder.go b/statistics/builder.go index 45ae0a5678c3f..8168c64acfb24 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -50,7 +50,7 @@ func (b *SortedBuilder) Hist() *Histogram { func (b *SortedBuilder) Iterate(data types.Datum) error { b.Count++ if b.Count == 1 { - b.hist.AppendBucket(&data, &data, 1, 1) + b.hist.AppendBucketWithNDV(&data, &data, 1, 1, 1) b.hist.NDV = 1 return nil } @@ -86,7 +86,7 @@ func (b *SortedBuilder) Iterate(data types.Datum) error { } else { b.lastNumber = b.hist.Buckets[b.bucketIdx].Count b.bucketIdx++ - b.hist.AppendBucket(&data, &data, b.lastNumber+1, 1) + b.hist.AppendBucketWithNDV(&data, &data, b.lastNumber+1, 1, 1) } b.hist.NDV++ } diff --git a/statistics/feedback.go b/statistics/feedback.go index 545d5875049f0..2ab6724ca6c33 100644 --- a/statistics/feedback.go +++ b/statistics/feedback.go @@ -44,6 +44,7 @@ type Feedback struct { Upper *types.Datum Count int64 Repeat int64 + Ndv int64 } // QueryFeedback is used to represent the query feedback info. It contains the query's scan ranges and number of rows @@ -236,7 +237,7 @@ func (q *QueryFeedback) DecodeIntValues() *QueryFeedback { func (q *QueryFeedback) StoreRanges(ranges []*ranger.Range) { q.Feedback = make([]Feedback, 0, len(ranges)) for _, ran := range ranges { - q.Feedback = append(q.Feedback, Feedback{&ran.LowVal[0], &ran.HighVal[0], 0, 0}) + q.Feedback = append(q.Feedback, Feedback{&ran.LowVal[0], &ran.HighVal[0], 0, 0, 0}) } } @@ -258,7 +259,7 @@ func (q *QueryFeedback) Actual() int64 { // Update updates the query feedback. `startKey` is the start scan key of the partial result, used to find // the range for update. `counts` is the scan counts of each range, used to update the feedback count info. -func (q *QueryFeedback) Update(startKey kv.Key, counts []int64) { +func (q *QueryFeedback) Update(startKey kv.Key, counts, ndvs []int64) { // Older versions do not have the counts info. if len(counts) == 0 { q.Invalidate() @@ -292,6 +293,7 @@ func (q *QueryFeedback) Update(startKey kv.Key, counts []int64) { for i := 0; i < len(counts)/2; i++ { j := len(counts) - i - 1 counts[i], counts[j] = counts[j], counts[i] + ndvs[i], ndvs[j] = ndvs[j], ndvs[i] } } // Update the feedback count info. @@ -301,6 +303,7 @@ func (q *QueryFeedback) Update(startKey kv.Key, counts []int64) { break } q.Feedback[i+idx].Count += count + q.Feedback[i+idx].Ndv += ndvs[i] } } @@ -503,23 +506,26 @@ type bucket = Feedback // calculates the count for each new bucket, merge the new bucket whose count // is smaller than "minBucketFraction*totalCount" with the next new bucket // until the last new bucket. -func (b *BucketFeedback) splitBucket(newNumBkts int, totalCount float64, originBucketCount float64) []bucket { +func (b *BucketFeedback) splitBucket(newNumBkts int, totalCount float64, originBucketCount float64, originalNdv int64) []bucket { // Split the bucket. bounds := b.getBoundaries(newNumBkts + 1) bkts := make([]bucket, 0, len(bounds)-1) sc := &stmtctx.StatementContext{TimeZone: time.UTC} for i := 1; i < len(bounds); i++ { - newBkt := bucket{&bounds[i-1], bounds[i].Clone(), 0, 0} + newBkt := bucket{&bounds[i-1], bounds[i].Clone(), 0, 0, 0} // get bucket count - _, ratio := getOverlapFraction(Feedback{b.lower, b.upper, int64(originBucketCount), 0}, newBkt) + _, ratio := getOverlapFraction(Feedback{b.lower, b.upper, int64(originBucketCount), 0, 0}, newBkt) countInNewBkt := originBucketCount * ratio - countInNewBkt = b.refineBucketCount(sc, newBkt, countInNewBkt) + ndvInNewBkt := int64(float64(originalNdv) * ratio) + countInNewBkt, ndvInNewBkt = b.refineBucketCount(sc, newBkt, countInNewBkt, ndvInNewBkt) + log.Warn("split bucket", zap.Float64("count", countInNewBkt), zap.Int64("ndv", ndvInNewBkt)) // do not split if the count of result bucket is too small. if countInNewBkt < minBucketFraction*totalCount { bounds[i] = bounds[i-1] continue } newBkt.Count = int64(countInNewBkt) + newBkt.Ndv = ndvInNewBkt bkts = append(bkts, newBkt) // To guarantee that each bucket's range will not overlap. setNextValue(&bounds[i]) @@ -556,45 +562,51 @@ func getOverlapFraction(fb Feedback, bkt bucket) (float64, float64) { } // mergeFullyContainedFeedback merges the max fraction of non-overlapped feedbacks that are fully contained in the bucket. -func (b *BucketFeedback) mergeFullyContainedFeedback(sc *stmtctx.StatementContext, bkt bucket) (float64, float64, bool) { +func (b *BucketFeedback) mergeFullyContainedFeedback(sc *stmtctx.StatementContext, bkt bucket) (float64, float64, int64, bool) { feedbacks := make([]Feedback, 0, len(b.feedback)) // Get all the fully contained feedbacks. for _, fb := range b.feedback { res, err := outOfRange(sc, bkt.Lower, bkt.Upper, fb.Lower) if res != 0 || err != nil { - return 0, 0, false + return 0, 0, 0, false } res, err = outOfRange(sc, bkt.Lower, bkt.Upper, fb.Upper) if res != 0 || err != nil { - return 0, 0, false + return 0, 0, 0, false } feedbacks = append(feedbacks, fb) } if len(feedbacks) == 0 { - return 0, 0, false + return 0, 0, 0, false } sortedFBs, ok := NonOverlappedFeedbacks(sc, feedbacks) if !ok { - return 0, 0, false + return 0, 0, 0, false } - var sumFraction, sumCount float64 + var ( + sumFraction, sumCount float64 + ndv int64 + ) for _, fb := range sortedFBs { fraction, _ := getOverlapFraction(fb, bkt) sumFraction += fraction sumCount += float64(fb.Count) + ndv += fb.Ndv } - return sumFraction, sumCount, true + return sumFraction, sumCount, ndv, true } // refineBucketCount refine the newly split bucket count. It uses the feedback that overlaps most // with the bucket to get the bucket count. -func (b *BucketFeedback) refineBucketCount(sc *stmtctx.StatementContext, bkt bucket, defaultCount float64) float64 { +func (b *BucketFeedback) refineBucketCount(sc *stmtctx.StatementContext, bkt bucket, defaultCount float64, defaultNdv int64) (float64, int64) { bestFraction := minBucketFraction count := defaultCount - sumFraction, sumCount, ok := b.mergeFullyContainedFeedback(sc, bkt) + ndv := defaultNdv + sumFraction, sumCount, sumNdv, ok := b.mergeFullyContainedFeedback(sc, bkt) if ok && sumFraction > bestFraction { bestFraction = sumFraction count = sumCount / sumFraction + ndv = int64(float64(sumNdv) / sumFraction) } for _, fb := range b.feedback { fraction, ratio := getOverlapFraction(fb, bkt) @@ -602,9 +614,10 @@ func (b *BucketFeedback) refineBucketCount(sc *stmtctx.StatementContext, bkt buc if fraction > bestFraction { bestFraction = fraction count = float64(fb.Count) * ratio + ndv = int64(float64(fb.Ndv) * ratio) } } - return count + return count, ndv } const ( @@ -685,6 +698,7 @@ func mergeBuckets(bkts []bucket, isNewBuckets []bool, totalCount float64) []buck bkts[bktCursor-1].Upper = bkts[i].Upper bkts[bktCursor-1].Count += bkts[i].Count bkts[bktCursor-1].Repeat = bkts[i].Repeat + bkts[bktCursor-1].Ndv += bkts[i].Ndv idCursor++ } else { bkts[bktCursor] = bkts[i] @@ -705,13 +719,13 @@ func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int6 bktFB, ok := bktID2FB[i] // No feedback, just use the original one. if !ok { - buckets = append(buckets, bucket{h.GetLower(i), h.GetUpper(i), h.bucketCount(i), h.Buckets[i].Repeat}) + buckets = append(buckets, bucket{h.GetLower(i), h.GetUpper(i), h.bucketCount(i), h.Buckets[i].Repeat, h.Buckets[i].NDV}) isNewBuckets = append(isNewBuckets, false) continue } // Distribute the total split count to bucket based on number of bucket feedback. newBktNums := splitCount * len(bktFB.feedback) / numTotalFBs - bkts := bktFB.splitBucket(newBktNums, h.TotalRowCount(), float64(h.bucketCount(i))) + bkts := bktFB.splitBucket(newBktNums, h.TotalRowCount(), float64(h.bucketCount(i)), h.Buckets[i].NDV) buckets = append(buckets, bkts...) if len(bkts) == 1 { isNewBuckets = append(isNewBuckets, false) @@ -731,11 +745,22 @@ func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int6 // UpdateHistogram updates the histogram according buckets. func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram { buckets, isNewBuckets, totalCount := splitBuckets(h, feedback) + ndvs := make([]int64, len(buckets)) + for i := range buckets { + ndvs[i] = buckets[i].Ndv + } + log.Warn("update hist", zap.Int64s("ndvs", ndvs)) buckets = mergeBuckets(buckets, isNewBuckets, float64(totalCount)) hist := buildNewHistogram(h, buckets) // Update the NDV of primary key column. if feedback.Tp == PkType { hist.NDV = int64(hist.TotalRowCount()) + } else if feedback.Tp == IndexType { + totNdv := int64(0) + for _, bkt := range buckets { + totNdv += bkt.Ndv + } + hist.NDV = totNdv } return hist } @@ -756,7 +781,7 @@ func buildNewHistogram(h *Histogram, buckets []bucket) *Histogram { hist := NewHistogram(h.ID, h.NDV, h.NullCount, h.LastUpdateVersion, h.Tp, len(buckets), h.TotColSize) preCount := int64(0) for _, bkt := range buckets { - hist.AppendBucket(bkt.Lower, bkt.Upper, bkt.Count+preCount, bkt.Repeat) + hist.AppendBucketWithNDV(bkt.Lower, bkt.Upper, bkt.Count+preCount, bkt.Repeat, bkt.Ndv) preCount += bkt.Count } return hist @@ -775,6 +800,8 @@ type queryFeedback struct { // After that, it stores the Ranges for `HashValues`. Counts []int64 ColumnRanges [][]byte + + Ndvs []int64 } func encodePKFeedback(q *QueryFeedback) (*queryFeedback, error) { @@ -794,6 +821,7 @@ func encodePKFeedback(q *QueryFeedback) (*queryFeedback, error) { } pb.IntRanges = append(pb.IntRanges, low, high) pb.Counts = append(pb.Counts, fb.Count) + pb.Ndvs = append(pb.Ndvs, fb.Ndv) } return pb, nil } @@ -805,9 +833,11 @@ func encodeIndexFeedback(q *QueryFeedback) *queryFeedback { if bytes.Compare(kv.Key(fb.Lower.GetBytes()).PrefixNext(), fb.Upper.GetBytes()) >= 0 { pb.IndexPoints = append(pb.IndexPoints, fb.Lower.GetBytes()) pointCounts = append(pointCounts, fb.Count) + pb.Ndvs = append(pb.Ndvs, fb.Ndv) } else { pb.IndexRanges = append(pb.IndexRanges, fb.Lower.GetBytes(), fb.Upper.GetBytes()) pb.Counts = append(pb.Counts, fb.Count) + pb.Ndvs = append(pb.Ndvs, fb.Ndv) } } pb.Counts = append(pb.Counts, pointCounts...) @@ -858,7 +888,7 @@ func decodeFeedbackForIndex(q *QueryFeedback, pb *queryFeedback, c *CMSketch) { // decode the index range feedback for i := 0; i < len(pb.IndexRanges); i += 2 { lower, upper := types.NewBytesDatum(pb.IndexRanges[i]), types.NewBytesDatum(pb.IndexRanges[i+1]) - q.Feedback = append(q.Feedback, Feedback{&lower, &upper, pb.Counts[i/2], 0}) + q.Feedback = append(q.Feedback, Feedback{&lower, &upper, pb.Counts[i/2], 0, pb.Ndvs[i/2]}) } if c != nil { // decode the index point feedback, just set value count in CM Sketch @@ -891,7 +921,7 @@ func decodeFeedbackForPK(q *QueryFeedback, pb *queryFeedback, isUnsigned bool) { lower.SetInt64(pb.IntRanges[i]) upper.SetInt64(pb.IntRanges[i+1]) } - q.Feedback = append(q.Feedback, Feedback{&lower, &upper, pb.Counts[i/2], 0}) + q.Feedback = append(q.Feedback, Feedback{&lower, &upper, pb.Counts[i/2], 0, pb.Ndvs[i/2]}) } } @@ -930,7 +960,7 @@ func decodeFeedbackForColumn(q *QueryFeedback, pb *queryFeedback, ft *types.Fiel if err != nil { return err } - q.Feedback = append(q.Feedback, Feedback{&low[0], &high[0], pb.Counts[i/2], 0}) + q.Feedback = append(q.Feedback, Feedback{&low[0], &high[0], pb.Counts[i/2], 0, 0}) } return nil } diff --git a/statistics/feedback_test.go b/statistics/feedback_test.go index 1d778660171ba..f42f7f57ca730 100644 --- a/statistics/feedback_test.go +++ b/statistics/feedback_test.go @@ -17,9 +17,11 @@ import ( "bytes" . "github.com/pingcap/check" + "github.com/pingcap/log" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/codec" + "go.uber.org/zap" ) var _ = Suite(&testFeedbackSuite{}) @@ -27,22 +29,22 @@ var _ = Suite(&testFeedbackSuite{}) type testFeedbackSuite struct { } -func newFeedback(lower, upper, count int64) Feedback { +func newFeedback(lower, upper, count, ndv int64) Feedback { low, upp := types.NewIntDatum(lower), types.NewIntDatum(upper) - return Feedback{&low, &upp, count, 0} + return Feedback{&low, &upp, count, 0, ndv} } func genFeedbacks(lower, upper int64) []Feedback { var feedbacks []Feedback for i := lower; i < upper; i++ { - feedbacks = append(feedbacks, newFeedback(i, upper, upper-i+1)) + feedbacks = append(feedbacks, newFeedback(i, upper, upper-i+1, upper-i+1)) } return feedbacks } func appendBucket(h *Histogram, l, r int64) { lower, upper := types.NewIntDatum(l), types.NewIntDatum(r) - h.AppendBucket(&lower, &upper, 0, 0) + h.AppendBucketWithNDV(&lower, &upper, 0, 0, 0) } func genHistogram() *Histogram { @@ -57,11 +59,11 @@ func genHistogram() *Histogram { func (s *testFeedbackSuite) TestUpdateHistogram(c *C) { feedbacks := []Feedback{ - newFeedback(0, 1, 10000), - newFeedback(1, 2, 1), - newFeedback(2, 3, 3), - newFeedback(4, 5, 2), - newFeedback(5, 7, 4), + newFeedback(0, 1, 10000, 1), + newFeedback(1, 2, 1, 1), + newFeedback(2, 3, 3, 1), + newFeedback(4, 5, 2, 1), + newFeedback(5, 7, 4, 1), } feedbacks = append(feedbacks, genFeedbacks(8, 20)...) feedbacks = append(feedbacks, genFeedbacks(21, 60)...) @@ -73,50 +75,64 @@ func (s *testFeedbackSuite) TestUpdateHistogram(c *C) { defer func() { defaultBucketCount = originBucketCount }() c.Assert(UpdateHistogram(q.Hist, q).ToString(0), Equals, "column:0 ndv:10053 totColSize:0\n"+ - "num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0\n"+ - "num: 7 lower_bound: 2 upper_bound: 5 repeats: 0\n"+ - "num: 4 lower_bound: 5 upper_bound: 7 repeats: 0\n"+ - "num: 11 lower_bound: 10 upper_bound: 20 repeats: 0\n"+ - "num: 19 lower_bound: 30 upper_bound: 49 repeats: 0\n"+ - "num: 11 lower_bound: 50 upper_bound: 60 repeats: 0") + "num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0 ndv: 2\n"+ + "num: 7 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 2\n"+ + "num: 4 lower_bound: 5 upper_bound: 7 repeats: 0 ndv: 1\n"+ + "num: 11 lower_bound: 10 upper_bound: 20 repeats: 0 ndv: 11\n"+ + "num: 19 lower_bound: 30 upper_bound: 49 repeats: 0 ndv: 19\n"+ + "num: 11 lower_bound: 50 upper_bound: 60 repeats: 0 ndv: 11") } func (s *testFeedbackSuite) TestSplitBuckets(c *C) { // test bucket split - feedbacks := []Feedback{newFeedback(0, 1, 1)} + feedbacks := []Feedback{newFeedback(0, 1, 1, 1)} for i := 0; i < 100; i++ { - feedbacks = append(feedbacks, newFeedback(10, 15, 5)) + feedbacks = append(feedbacks, newFeedback(10, 15, 5, 5)) } q := NewQueryFeedback(0, genHistogram(), 0, false) q.Feedback = feedbacks + oldCnts := make([]int64, q.Hist.Len()) + for i := range q.Hist.Buckets { + oldCnts[i] = q.Hist.bucketCount(i) + } + oldNdvs := make([]int64, q.Hist.Len()) + for i := range q.Hist.Buckets { + oldNdvs[i] = q.Hist.Buckets[i].NDV + } + log.Warn("in test", zap.Int64s("ndvs", oldNdvs), zap.Int64s("cnts", oldCnts)) buckets, isNewBuckets, totalCount := splitBuckets(q.Hist, q) + ndvs := make([]int64, len(buckets)) + for i := range buckets { + ndvs[i] = buckets[i].Ndv + } + log.Warn("in test", zap.Int64s("ndvs", ndvs)) c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals, "column:0 ndv:0 totColSize:0\n"+ - "num: 1 lower_bound: 0 upper_bound: 1 repeats: 0\n"+ - "num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+ - "num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+ - "num: 5 lower_bound: 10 upper_bound: 15 repeats: 0\n"+ - "num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+ - "num: 0 lower_bound: 30 upper_bound: 50 repeats: 0") + "num: 1 lower_bound: 0 upper_bound: 1 repeats: 0 ndv: 1\n"+ + "num: 0 lower_bound: 2 upper_bound: 3 repeats: 0 ndv: 0\n"+ + "num: 0 lower_bound: 5 upper_bound: 7 repeats: 0 ndv: 0\n"+ + "num: 5 lower_bound: 10 upper_bound: 15 repeats: 0 ndv: 5\n"+ + "num: 0 lower_bound: 16 upper_bound: 20 repeats: 0 ndv: 0\n"+ + "num: 0 lower_bound: 30 upper_bound: 50 repeats: 0 ndv: 0") c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false}) c.Assert(totalCount, Equals, int64(6)) // test do not split if the bucket count is too small - feedbacks = []Feedback{newFeedback(0, 1, 100000)} + feedbacks = []Feedback{newFeedback(0, 1, 100000, 1)} for i := 0; i < 100; i++ { - feedbacks = append(feedbacks, newFeedback(10, 15, 1)) + feedbacks = append(feedbacks, newFeedback(10, 15, 1, 1)) } q = NewQueryFeedback(0, genHistogram(), 0, false) q.Feedback = feedbacks buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q) c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals, "column:0 ndv:0 totColSize:0\n"+ - "num: 100000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+ - "num: 0 lower_bound: 2 upper_bound: 3 repeats: 0\n"+ - "num: 0 lower_bound: 5 upper_bound: 7 repeats: 0\n"+ - "num: 1 lower_bound: 10 upper_bound: 15 repeats: 0\n"+ - "num: 0 lower_bound: 16 upper_bound: 20 repeats: 0\n"+ - "num: 0 lower_bound: 30 upper_bound: 50 repeats: 0") + "num: 100000 lower_bound: 0 upper_bound: 1 repeats: 0 ndv: 1\n"+ + "num: 0 lower_bound: 2 upper_bound: 3 repeats: 0 ndv: 0\n"+ + "num: 0 lower_bound: 5 upper_bound: 7 repeats: 0 ndv: 0\n"+ + "num: 1 lower_bound: 10 upper_bound: 15 repeats: 0 ndv: 1\n"+ + "num: 0 lower_bound: 16 upper_bound: 20 repeats: 0 ndv: 0\n"+ + "num: 0 lower_bound: 30 upper_bound: 50 repeats: 0 ndv: 0") c.Assert(isNewBuckets, DeepEquals, []bool{false, false, false, true, true, false}) c.Assert(totalCount, Equals, int64(100001)) @@ -124,16 +140,17 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) { h := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLong), 5, 0) appendBucket(h, 0, 1000000) h.Buckets[0].Count = 1000000 + h.Buckets[0].NDV = 1000000 feedbacks = feedbacks[:0] for i := 0; i < 100; i++ { - feedbacks = append(feedbacks, newFeedback(0, 10, 1)) + feedbacks = append(feedbacks, newFeedback(0, 10, 1, 1)) } q = NewQueryFeedback(0, h, 0, false) q.Feedback = feedbacks buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q) c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals, "column:0 ndv:0 totColSize:0\n"+ - "num: 1000000 lower_bound: 0 upper_bound: 1000000 repeats: 0") + "num: 1000000 lower_bound: 0 upper_bound: 1000000 repeats: 0 ndv: 1000000") c.Assert(isNewBuckets, DeepEquals, []bool{false}) c.Assert(totalCount, Equals, int64(1000000)) @@ -142,15 +159,15 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) { appendBucket(h, 0, 1000000) feedbacks = feedbacks[:0] for i := 0; i < 100; i++ { - feedbacks = append(feedbacks, newFeedback(0, 10, 1)) + feedbacks = append(feedbacks, newFeedback(0, 10, 1, 1)) } q = NewQueryFeedback(0, h, 0, false) q.Feedback = feedbacks buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q) c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals, "column:0 ndv:0 totColSize:0\n"+ - "num: 1 lower_bound: 0 upper_bound: 10 repeats: 0\n"+ - "num: 0 lower_bound: 11 upper_bound: 1000000 repeats: 0") + "num: 1 lower_bound: 0 upper_bound: 10 repeats: 0 ndv: 1\n"+ + "num: 0 lower_bound: 11 upper_bound: 1000000 repeats: 0 ndv: 0") c.Assert(isNewBuckets, DeepEquals, []bool{true, true}) c.Assert(totalCount, Equals, int64(1)) @@ -158,14 +175,14 @@ func (s *testFeedbackSuite) TestSplitBuckets(c *C) { h = NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLong), 5, 0) appendBucket(h, 0, 10000) feedbacks = feedbacks[:0] - feedbacks = append(feedbacks, newFeedback(0, 4000, 4000)) - feedbacks = append(feedbacks, newFeedback(4001, 9999, 1000)) + feedbacks = append(feedbacks, newFeedback(0, 4000, 4000, 4000)) + feedbacks = append(feedbacks, newFeedback(4001, 9999, 1000, 1000)) q = NewQueryFeedback(0, h, 0, false) q.Feedback = feedbacks buckets, isNewBuckets, totalCount = splitBuckets(q.Hist, q) c.Assert(buildNewHistogram(q.Hist, buckets).ToString(0), Equals, "column:0 ndv:0 totColSize:0\n"+ - "num: 5001 lower_bound: 0 upper_bound: 10000 repeats: 0") + "num: 5001 lower_bound: 0 upper_bound: 10000 repeats: 0 ndv: 5001") c.Assert(isNewBuckets, DeepEquals, []bool{false}) c.Assert(totalCount, Equals, int64(5001)) } @@ -176,6 +193,7 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) { tests := []struct { points []int64 counts []int64 + ndvs []int64 isNewBuckets []bool bucketCount int result string @@ -183,37 +201,43 @@ func (s *testFeedbackSuite) TestMergeBuckets(c *C) { { points: []int64{1, 2}, counts: []int64{1}, + ndvs: []int64{1}, isNewBuckets: []bool{false}, bucketCount: 1, - result: "column:0 ndv:0 totColSize:0\nnum: 1 lower_bound: 1 upper_bound: 2 repeats: 0", + result: "column:0 ndv:0 totColSize:0\nnum: 1 lower_bound: 1 upper_bound: 2 repeats: 0 ndv: 1", }, { points: []int64{1, 2, 2, 3, 3, 4}, counts: []int64{100000, 1, 1}, + ndvs: []int64{1, 1, 1}, isNewBuckets: []bool{false, false, false}, bucketCount: 2, result: "column:0 ndv:0 totColSize:0\n" + - "num: 100000 lower_bound: 1 upper_bound: 2 repeats: 0\n" + - "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0", + "num: 100000 lower_bound: 1 upper_bound: 2 repeats: 0 ndv: 1\n" + + "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 2", }, // test do not Merge if the result bucket count is too large { points: []int64{1, 2, 2, 3, 3, 4, 4, 5}, counts: []int64{1, 1, 100000, 100000}, + ndvs: []int64{1, 1, 1, 1}, isNewBuckets: []bool{false, false, false, false}, bucketCount: 3, result: "column:0 ndv:0 totColSize:0\n" + - "num: 2 lower_bound: 1 upper_bound: 3 repeats: 0\n" + - "num: 100000 lower_bound: 3 upper_bound: 4 repeats: 0\n" + - "num: 100000 lower_bound: 4 upper_bound: 5 repeats: 0", + "num: 2 lower_bound: 1 upper_bound: 3 repeats: 0 ndv: 2\n" + + "num: 100000 lower_bound: 3 upper_bound: 4 repeats: 0 ndv: 1\n" + + "num: 100000 lower_bound: 4 upper_bound: 5 repeats: 0 ndv: 1", }, } for _, t := range tests { + if len(t.counts) != len(t.ndvs) { + c.Assert(false, IsTrue) + } bkts := make([]bucket, 0, len(t.counts)) totalCount := int64(0) for i := 0; i < len(t.counts); i++ { lower, upper := types.NewIntDatum(t.points[2*i]), types.NewIntDatum(t.points[2*i+1]) - bkts = append(bkts, bucket{&lower, &upper, t.counts[i], 0}) + bkts = append(bkts, bucket{&lower, &upper, t.counts[i], 0, t.ndvs[i]}) totalCount += t.counts[i] } defaultBucketCount = t.bucketCount @@ -232,8 +256,8 @@ func encodeInt(v int64) *types.Datum { func (s *testFeedbackSuite) TestFeedbackEncoding(c *C) { hist := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLong), 0, 0) q := &QueryFeedback{Hist: hist, Tp: PkType} - q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0}) - q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(5), 1, 0}) + q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0, 1}) + q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(5), 1, 0, 1}) val, err := EncodeFeedback(q) c.Assert(err, IsNil) rq := &QueryFeedback{} @@ -246,8 +270,8 @@ func (s *testFeedbackSuite) TestFeedbackEncoding(c *C) { hist.Tp = types.NewFieldType(mysql.TypeBlob) q = &QueryFeedback{Hist: hist} - q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0}) - q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(1), 1, 0}) + q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(3), 1, 0, 1}) + q.Feedback = append(q.Feedback, Feedback{encodeInt(0), encodeInt(1), 1, 0, 1}) val, err = EncodeFeedback(q) c.Assert(err, IsNil) rq = &QueryFeedback{} @@ -268,6 +292,9 @@ func (q *QueryFeedback) Equal(rq *QueryFeedback) bool { if fb.Count != rfb.Count { return false } + if fb.Ndv != rfb.Ndv { + return false + } if fb.Lower.Kind() == types.KindInt64 { if fb.Lower.GetInt64() != rfb.Lower.GetInt64() { return false diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index 5fc609d82c488..fd43de6188ef4 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -696,7 +696,7 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg if err != nil { return } - sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_buckets(table_id, is_index, hist_id, bucket_id, count, repeats, lower_bound, upper_bound) values(%d, %d, %d, %d, %d, %d, X'%X', X'%X')", tableID, isIndex, hg.ID, i, count, hg.Buckets[i].Repeat, lowerBound.GetBytes(), upperBound.GetBytes())) + sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_buckets(table_id, is_index, hist_id, bucket_id, count, repeats, lower_bound, upper_bound, ndv) values(%d, %d, %d, %d, %d, %d, X'%X', X'%X', %d)", tableID, isIndex, hg.ID, i, count, hg.Buckets[i].Repeat, lowerBound.GetBytes(), upperBound.GetBytes(), hg.Buckets[i].NDV)) } if isAnalyzed == 1 && len(lastAnalyzePos) > 0 { sqls = append(sqls, fmt.Sprintf("update mysql.stats_histograms set last_analyze_pos = X'%X' where table_id = %d and is_index = %d and hist_id = %d", lastAnalyzePos, tableID, isIndex, hg.ID)) @@ -729,7 +729,7 @@ func (h *Handle) SaveMetaToStorage(tableID, count, modifyCount int64) (err error } func (h *Handle) histogramFromStorage(reader *statsReader, tableID int64, colID int64, tp *types.FieldType, distinct int64, isIndex int, ver uint64, nullCount int64, totColSize int64, corr float64) (_ *statistics.Histogram, err error) { - selSQL := fmt.Sprintf("select count, repeats, lower_bound, upper_bound from mysql.stats_buckets where table_id = %d and is_index = %d and hist_id = %d order by bucket_id", tableID, isIndex, colID) + selSQL := fmt.Sprintf("select count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where table_id = %d and is_index = %d and hist_id = %d order by bucket_id", tableID, isIndex, colID) rows, fields, err := reader.read(selSQL) if err != nil { return nil, errors.Trace(err) @@ -759,7 +759,7 @@ func (h *Handle) histogramFromStorage(reader *statsReader, tableID int64, colID } } totalCount += count - hg.AppendBucket(&lowerBound, &upperBound, totalCount, repeats) + hg.AppendBucketWithNDV(&lowerBound, &upperBound, totalCount, repeats, rows[i].GetInt64(4)) } hg.PreCalculateScalar() return hg, nil diff --git a/statistics/handle/update.go b/statistics/handle/update.go index 368a3c6adc682..5d0de1ea7079e 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -161,9 +161,9 @@ func (s *SessionStatsCollector) Update(id int64, delta int64, count int64, colSi var ( // MinLogScanCount is the minimum scan count for a feedback to be logged. - MinLogScanCount = int64(1000) + MinLogScanCount = int64(1) // MinLogErrorRate is the minimum error rate for a feedback to be logged. - MinLogErrorRate = 0.5 + MinLogErrorRate = 0.0 ) // StoreQueryFeedback merges the feedback into stats collector. @@ -383,8 +383,17 @@ func (h *Handle) DumpStatsFeedbackToKV() error { err = h.DumpFeedbackToKV(fb) } else { t, ok := h.statsCache.Load().(statsCache).tables[fb.PhysicalID] - if ok { + if !ok { + continue + } + idx, ok := t.Indices[fb.Hist.ID] + if !ok { + continue + } + if idx.StatsVer == statistics.Version1 { err = h.DumpFeedbackForIndex(fb, t) + } else { + err = h.DumpFeedbackToKV(fb) } } if err != nil { @@ -427,6 +436,7 @@ func (h *Handle) DumpFeedbackToKV(fb *statistics.QueryFeedback) error { // feedback locally on this tidb-server, so it could be used more timely. func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) { h.sweepList() + logutil.BgLogger().Warn("local feedback update") for _, fbs := range h.feedback.Feedbacks { for _, fb := range fbs { h.mu.Lock() @@ -442,6 +452,7 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) { if !ok || idx.Histogram.Len() == 0 { continue } + logutil.BgLogger().Warn("local feedback update index") newIdx := *idx eqFB, ranFB := statistics.SplitFeedbackByQueryType(fb.Feedback) newIdx.CMSketch = statistics.UpdateCMSketch(idx.CMSketch, eqFB) @@ -1068,6 +1079,7 @@ func (h *Handle) DumpFeedbackForIndex(q *statistics.QueryFeedback, t *statistics return errors.Trace(h.DumpFeedbackToKV(q)) } + // minAdjustFactor is the minimum adjust factor of each index feedback. // We use it to avoid adjusting too much when the assumption of independence failed. const minAdjustFactor = 0.7 diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go index 35744b998d490..9b9bacfb7b8ef 100644 --- a/statistics/handle/update_test.go +++ b/statistics/handle/update_test.go @@ -760,25 +760,25 @@ func (s *testStatsSuite) TestQueryFeedback(c *C) { // test primary key feedback sql: "select * from t where t.a <= 5 order by a desc", hist: "column:1 ndv:4 totColSize:0\n" + - "num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0\n" + - "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0\n" + - "num: 1 lower_bound: 4 upper_bound: 4 repeats: 1", + "num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 1\n" + + "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 2\n" + + "num: 1 lower_bound: 4 upper_bound: 4 repeats: 1 ndv: 1", idxCols: 0, }, { // test index feedback by double read sql: "select * from t use index(idx) where t.b <= 5", - hist: "index:1 ndv:2\n" + - "num: 3 lower_bound: -inf upper_bound: 5 repeats: 0\n" + - "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1", + hist: "index:1 ndv:3\n" + + "num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 2\n" + + "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1", idxCols: 1, }, { // test index feedback by single read sql: "select b from t use index(idx) where t.b <= 5", - hist: "index:1 ndv:2\n" + - "num: 3 lower_bound: -inf upper_bound: 5 repeats: 0\n" + - "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1", + hist: "index:1 ndv:3\n" + + "num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 2\n" + + "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1", idxCols: 1, }, } @@ -880,22 +880,22 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) { // test primary key feedback sql: "select * from t where t.a <= 5", hist: "column:1 ndv:2 totColSize:0\n" + - "num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0\n" + - "num: 1 lower_bound: 2 upper_bound: 5 repeats: 0", + "num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 1\n" + + "num: 1 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 1", idxCols: 0, }, { // test index feedback by double read sql: "select * from t use index(idx) where t.b <= 5", hist: "index:1 ndv:1\n" + - "num: 2 lower_bound: -inf upper_bound: 6 repeats: 0", + "num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 1", idxCols: 1, }, { // test index feedback by single read sql: "select b from t use index(idx) where t.b <= 5", hist: "index:1 ndv:1\n" + - "num: 2 lower_bound: -inf upper_bound: 6 repeats: 0", + "num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 1", idxCols: 1, }, } @@ -1016,10 +1016,10 @@ func (s *testStatsSuite) TestUpdateStatsByLocalFeedback(c *C) { h.UpdateStatsByLocalFeedback(s.do.InfoSchema()) tbl := h.GetTableStats(tblInfo) - c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+ - "num: 1 lower_bound: 1 upper_bound: 1 repeats: 1\n"+ - "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0\n"+ - "num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0") + c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:1 totColSize:0\n"+ + "num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 1\n"+ + "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+ + "num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0") sc := &stmtctx.StatementContext{TimeZone: time.Local} low, err := codec.EncodeKey(sc, nil, types.NewIntDatum(5)) c.Assert(err, IsNil) @@ -1027,8 +1027,8 @@ func (s *testStatsSuite) TestUpdateStatsByLocalFeedback(c *C) { c.Assert(tbl.Indices[tblInfo.Indices[0].ID].CMSketch.QueryBytes(low), Equals, uint64(2)) c.Assert(tbl.Indices[tblInfo.Indices[0].ID].ToString(1), Equals, "index:1 ndv:2\n"+ - "num: 2 lower_bound: -inf upper_bound: 5 repeats: 0\n"+ - "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1") + "num: 2 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 1\n"+ + "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1") // Test that it won't cause panic after update. testKit.MustQuery("select * from t use index(idx) where b > 0") @@ -1072,10 +1072,10 @@ func (s *testStatsSuite) TestUpdatePartitionStatsByLocalFeedback(c *C) { pid := tblInfo.Partition.Definitions[0].ID tbl := h.GetPartitionStats(tblInfo, pid) - c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+ - "num: 1 lower_bound: 1 upper_bound: 1 repeats: 1\n"+ - "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0\n"+ - "num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0") + c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:1 totColSize:0\n"+ + "num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 1\n"+ + "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+ + "num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0") } type logHook struct { @@ -1148,21 +1148,21 @@ func (s *testStatsSuite) TestLogDetailedInfo(c *C) { }{ { sql: "select * from t where t.a <= 15", - result: "[stats-feedback] test.t, column=a, rangeStr=range: [-inf,8), actual: 8, expected: 8, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}" + - "[stats-feedback] test.t, column=a, rangeStr=range: [8,15), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}", + result: "[stats-feedback] test.t, column=a, rangeStr=range: [-inf,8), actual: 8, expected: 8, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 8, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}" + + "[stats-feedback] test.t, column=a, rangeStr=range: [8,15), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}", }, { sql: "select * from t use index(idx) where t.b <= 15", - result: "[stats-feedback] test.t, index=idx, rangeStr=range: [-inf,8), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}" + - "[stats-feedback] test.t, index=idx, rangeStr=range: [8,16), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1, num: 4 lower_bound: 16 upper_bound: 19 repeats: 1}", + result: "[stats-feedback] test.t, index=idx, rangeStr=range: [-inf,8), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 8, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}" + + "[stats-feedback] test.t, index=idx, rangeStr=range: [8,16), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8, num: 4 lower_bound: 16 upper_bound: 19 repeats: 1 ndv: 4}", }, { sql: "select b from t use index(idx_ba) where b = 1 and a <= 5", - result: "[stats-feedback] test.t, index=idx_ba, actual=1, equality=1, expected equality=1, range=range: [-inf,6], actual: -1, expected: 6, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1}", + result: "[stats-feedback] test.t, index=idx_ba, rangeStr=range: [1 -inf,1 6), actual: 1, expected: 0, histogram: {num: 8 lower_bound: (0, 0) upper_bound: (7, 7) repeats: 1 ndv: 8}", }, { sql: "select b from t use index(idx_bc) where b = 1 and c <= 5", - result: "[stats-feedback] test.t, index=idx_bc, actual=1, equality=1, expected equality=1, range=[-inf,6], pseudo count=7", + result: "[stats-feedback] test.t, index=idx_bc, rangeStr=range: [1 -inf,1 6), actual: 1, expected: 0, histogram: {num: 8 lower_bound: (0, 0) upper_bound: (7, 7) repeats: 1 ndv: 8}", }, { sql: "select b from t use index(idx_ba) where b = 1", @@ -1585,25 +1585,25 @@ func (s *testStatsSuite) TestFeedbackRanges(c *C) { { sql: "select * from t where a <= 50 or (a > 130 and a < 140)", hist: "column:1 ndv:30 totColSize:0\n" + - "num: 8 lower_bound: -128 upper_bound: 8 repeats: 0\n" + - "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" + - "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0", + "num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 8\n" + + "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" + + "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14", colID: 1, }, { sql: "select * from t where a >= 10", hist: "column:1 ndv:30 totColSize:0\n" + - "num: 8 lower_bound: -128 upper_bound: 8 repeats: 0\n" + - "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" + - "num: 14 lower_bound: 16 upper_bound: 127 repeats: 0", + "num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 8\n" + + "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" + + "num: 14 lower_bound: 16 upper_bound: 127 repeats: 0 ndv: 14", colID: 1, }, { sql: "select * from t use index(idx) where a = 1 and (b <= 50 or (b > 130 and b < 140))", hist: "column:2 ndv:20 totColSize:30\n" + - "num: 8 lower_bound: -128 upper_bound: 7 repeats: 0\n" + - "num: 8 lower_bound: 7 upper_bound: 14 repeats: 0\n" + - "num: 7 lower_bound: 14 upper_bound: 51 repeats: 0", + "num: 8 lower_bound: -128 upper_bound: 7 repeats: 0 ndv: 8\n" + + "num: 8 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 8\n" + + "num: 7 lower_bound: 14 upper_bound: 51 repeats: 0 ndv: 7", colID: 2, }, } @@ -1665,33 +1665,33 @@ func (s *testStatsSuite) TestUnsignedFeedbackRanges(c *C) { { sql: "select * from t where a <= 50", hist: "column:1 ndv:30 totColSize:10\n" + - "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" + - "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" + - "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0", + "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 8\n" + + "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" + + "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14", tblName: "t", }, { sql: "select count(*) from t", hist: "column:1 ndv:30 totColSize:10\n" + - "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" + - "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" + - "num: 14 lower_bound: 16 upper_bound: 255 repeats: 0", + "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 0\n" + + "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" + + "num: 14 lower_bound: 16 upper_bound: 255 repeats: 0 ndv: 0", tblName: "t", }, { sql: "select * from t1 where a <= 50", hist: "column:1 ndv:30 totColSize:10\n" + - "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" + - "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" + - "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0", + "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 8\n" + + "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" + + "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14", tblName: "t1", }, { sql: "select count(*) from t1", hist: "column:1 ndv:30 totColSize:10\n" + - "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" + - "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" + - "num: 14 lower_bound: 16 upper_bound: 18446744073709551615 repeats: 0", + "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 0\n" + + "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" + + "num: 14 lower_bound: 16 upper_bound: 18446744073709551615 repeats: 0 ndv: 0", tblName: "t1", }, } diff --git a/statistics/histogram.go b/statistics/histogram.go index 70bf8960597ec..1d15e15f95d6a 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -79,6 +79,7 @@ type Histogram struct { type Bucket struct { Count int64 Repeat int64 + NDV int64 } type scalar struct { @@ -201,7 +202,14 @@ func (c *Column) AvgColSizeListInDisk(count int64) float64 { // AppendBucket appends a bucket into `hg`. func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64) { - hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat}) + hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat, NDV: 1}) + hg.Bounds.AppendDatum(0, lower) + hg.Bounds.AppendDatum(0, upper) +} + +// AppendBucketWithNDV appends a bucket into `hg` and set value for field `NDV`. +func (hg *Histogram) AppendBucketWithNDV(lower *types.Datum, upper *types.Datum, count, repeat, ndv int64) { + hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat, NDV: ndv}) hg.Bounds.AppendDatum(0, lower) hg.Bounds.AppendDatum(0, upper) } @@ -210,7 +218,9 @@ func (hg *Histogram) updateLastBucket(upper *types.Datum, count, repeat int64) { len := hg.Len() hg.Bounds.TruncateTo(2*len - 1) hg.Bounds.AppendDatum(0, upper) - hg.Buckets[len-1] = Bucket{Count: count, Repeat: repeat} + hg.Buckets[len-1].Count = count + hg.Buckets[len-1].Repeat = repeat + hg.Buckets[len-1].NDV++ } // DecodeTo decodes the histogram bucket values into `Tp`. @@ -261,9 +271,13 @@ func HistogramEqual(a, b *Histogram, ignoreID bool) bool { } // constants for stats version. These const can be used for solving compatibility issue. +// If the version number is 0, it means the most original statistics. const ( - CurStatsVersion = Version1 - Version1 = 1 + CurStatsVersion = Version2 + // Version1 added CMSketch. + Version1 = 1 + // Version2 added bucket NDV for index's full analyze. + Version2 = 2 ) // AnalyzeFlag is set when the statistics comes from analyze and has not been modified by feedback. @@ -302,7 +316,7 @@ func (hg *Histogram) BucketToString(bktID, idxCols int) string { terror.Log(errors.Trace(err)) lowerVal, err := ValueToString(hg.GetLower(bktID), idxCols) terror.Log(errors.Trace(err)) - return fmt.Sprintf("num: %d lower_bound: %s upper_bound: %s repeats: %d", hg.bucketCount(bktID), lowerVal, upperVal, hg.Buckets[bktID].Repeat) + return fmt.Sprintf("num: %d lower_bound: %s upper_bound: %s repeats: %d ndv: %d", hg.bucketCount(bktID), lowerVal, upperVal, hg.Buckets[bktID].Repeat, hg.Buckets[bktID].NDV) } // ToString gets the string representation for the histogram. @@ -327,6 +341,9 @@ func (hg *Histogram) equalRowCount(value types.Datum) float64 { if match { return float64(hg.Buckets[index/2].Repeat) } + if hg.Buckets[index/2].NDV > 0 { + return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV) + } return hg.notNullCount() / float64(hg.NDV) } if match { @@ -334,6 +351,9 @@ func (hg *Histogram) equalRowCount(value types.Datum) float64 { if cmp(hg.Bounds.GetRow(index), 0, hg.Bounds.GetRow(index+1), 0) == 0 { return float64(hg.Buckets[index/2].Repeat) } + if hg.Buckets[index/2].NDV > 0 { + return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV) + } return hg.notNullCount() / float64(hg.NDV) } return 0 @@ -410,7 +430,9 @@ func (hg *Histogram) mergeBuckets(bucketIdx int) { curBuck := 0 c := chunk.NewChunkWithCapacity([]*types.FieldType{hg.Tp}, bucketIdx) for i := 0; i+1 <= bucketIdx; i += 2 { - hg.Buckets[curBuck] = hg.Buckets[i+1] + hg.Buckets[curBuck].NDV = hg.Buckets[i+1].NDV + hg.Buckets[i].NDV + hg.Buckets[curBuck].Count = hg.Buckets[i+1].Count + hg.Buckets[curBuck].Repeat = hg.Buckets[i+1].Repeat c.AppendDatum(0, hg.GetLower(i)) c.AppendDatum(0, hg.GetUpper(i+1)) curBuck++ @@ -578,6 +600,7 @@ func HistogramToProto(hg *Histogram) *tipb.Histogram { LowerBound: hg.GetLower(i).GetBytes(), UpperBound: hg.GetUpper(i).GetBytes(), Repeats: hg.Buckets[i].Repeat, + Ndv: &hg.Buckets[i].NDV, } protoHg.Buckets = append(protoHg.Buckets, bkt) } @@ -592,7 +615,11 @@ func HistogramFromProto(protoHg *tipb.Histogram) *Histogram { hg := NewHistogram(0, protoHg.Ndv, 0, 0, tp, len(protoHg.Buckets), 0) for _, bucket := range protoHg.Buckets { lower, upper := types.NewBytesDatum(bucket.LowerBound), types.NewBytesDatum(bucket.UpperBound) - hg.AppendBucket(&lower, &upper, bucket.Count, bucket.Repeats) + if bucket.Ndv != nil { + hg.AppendBucketWithNDV(&lower, &upper, bucket.Count, bucket.Repeats, *bucket.Ndv) + } else { + hg.AppendBucket(&lower, &upper, bucket.Count, bucket.Repeats) + } } return hg } @@ -626,6 +653,7 @@ func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, offset := int64(0) if cmp == 0 { lh.NDV-- + lh.Buckets[len(lh.Buckets)-1].NDV-- lh.updateLastBucket(rh.GetUpper(0), lh.Buckets[lLen-1].Count+rh.Buckets[0].Count, rh.Buckets[0].Repeat) offset = rh.Buckets[0].Count rh.popFirstBucket() @@ -916,20 +944,20 @@ func (idx *Index) MemoryUsage() (sum int64) { var nullKeyBytes, _ = codec.EncodeKey(nil, nil, types.NewDatum(nil)) -func (idx *Index) equalRowCount(sc *stmtctx.StatementContext, b []byte, modifyCount int64) (float64, error) { +func (idx *Index) equalRowCount(b []byte, modifyCount int64) float64 { if len(idx.Info.Columns) == 1 { if bytes.Equal(b, nullKeyBytes) { - return float64(idx.NullCount), nil + return float64(idx.NullCount) } } val := types.NewBytesDatum(b) if idx.NDV > 0 && idx.outOfRange(val) { - return outOfRangeEQSelectivity(idx.NDV, modifyCount, int64(idx.TotalRowCount())) * idx.TotalRowCount(), nil + return outOfRangeEQSelectivity(idx.NDV, modifyCount, int64(idx.TotalRowCount())) * idx.TotalRowCount() } - if idx.CMSketch != nil { - return float64(idx.CMSketch.QueryBytes(b)), nil + if idx.CMSketch != nil && (len(idx.Histogram.Buckets) == 0 || idx.Histogram.Buckets[0].NDV == 0) { + return float64(idx.CMSketch.QueryBytes(b)) } - return idx.Histogram.equalRowCount(val), nil + return idx.Histogram.equalRowCount(val) } // GetRowCount returns the row count of the given ranges. @@ -957,10 +985,7 @@ func (idx *Index) GetRowCount(sc *stmtctx.StatementContext, indexRanges []*range totalCount += 1 continue } - count, err := idx.equalRowCount(sc, lb, modifyCount) - if err != nil { - return 0, err - } + count := idx.equalRowCount(lb, modifyCount) totalCount += count continue } diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go index 4fbf65ffffc50..aec115c5599a3 100644 --- a/statistics/statistics_test.go +++ b/statistics/statistics_test.go @@ -675,5 +675,5 @@ func (s *testStatisticsSuite) TestIndexRanges(c *C) { ran[0].HighVal[0] = types.NewIntDatum(1000) count, err = tbl.GetRowCountByIndexRanges(sc, 0, ran) c.Assert(err, IsNil) - c.Assert(int(count), Equals, 0) + c.Assert(int(count), Equals, 3) } diff --git a/statistics/table.go b/statistics/table.go index e080e755b1061..ddfc90fe64ca5 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -489,6 +489,9 @@ func (coll *HistColl) getEqualCondSelectivity(idx *Index, bytes []byte, usedCols } return outOfRangeEQSelectivity(ndv, coll.ModifyCount, int64(idx.TotalRowCount())) } + if coverAll && len(idx.Histogram.Buckets) > 0 && idx.Histogram.Buckets[0].NDV > 0 { + return idx.Histogram.equalRowCount(val) + } return float64(idx.CMSketch.QueryBytes(bytes)) / float64(idx.TotalRowCount()) } diff --git a/statistics/testdata/stats_suite_out.json b/statistics/testdata/stats_suite_out.json index cd4fa051551d8..b92788b9c0b69 100644 --- a/statistics/testdata/stats_suite_out.json +++ b/statistics/testdata/stats_suite_out.json @@ -60,8 +60,8 @@ "Name": "TestDiscreteDistribution", "Cases": [ [ - "IndexReader_6 0.00 root index:IndexRangeScan_5", - "└─IndexRangeScan_5 0.00 cop[tikv] table:t, index:idx(a, b) range:[\"tw\" -inf,\"tw\" 0), keep order:false" + "IndexReader_6 1.02 root index:IndexRangeScan_5", + "└─IndexRangeScan_5 1.02 cop[tikv] table:t, index:idx(a, b) range:[\"tw\" -inf,\"tw\" 0), keep order:false" ] ] }, @@ -92,8 +92,8 @@ "Name": "TestCollationColumnEstimate", "Cases": [ [ - "test t a 0 0 2 2 \u0000A\u0000A\u0000A \u0000A\u0000A\u0000A", - "test t a 0 1 4 2 \u0000B\u0000B\u0000B \u0000B\u0000B\u0000B" + "test t a 0 0 2 2 \u0000A\u0000A\u0000A \u0000A\u0000A\u0000A 1", + "test t a 0 1 4 2 \u0000B\u0000B\u0000B \u0000B\u0000B\u0000B 1" ], [ "TableReader_7 2.00 root data:Selection_6", diff --git a/store/mockstore/unistore/cophandler/closure_exec.go b/store/mockstore/unistore/cophandler/closure_exec.go index 4342ab0d7388b..4b5aa22f9dccc 100644 --- a/store/mockstore/unistore/cophandler/closure_exec.go +++ b/store/mockstore/unistore/cophandler/closure_exec.go @@ -141,6 +141,10 @@ func newClosureExecutor(dagCtx *dagContext, dagReq *tipb.DAGRequest) (*closureEx e.unique = idxScan.GetUnique() e.scanCtx.desc = idxScan.Desc e.initIdxScanCtx(idxScan) + if dagReq.GetCollectRangeCounts() { + e.idxScanCtx.collectNdv = true + e.idxScanCtx.previousVals = make([][]byte, e.idxScanCtx.columnLen) + } default: panic(fmt.Sprintf("unknown first executor type %s", executors[0].Tp)) } @@ -150,6 +154,7 @@ func newClosureExecutor(dagCtx *dagContext, dagReq *tipb.DAGRequest) (*closureEx } if dagReq.GetCollectRangeCounts() { e.counts = make([]int64, len(ranges)) + e.ndvs = make([]int64, len(ranges)) } e.kvRanges = ranges e.scanCtx.chk = chunk.NewChunkWithCapacity(e.fieldTps, 32) @@ -315,6 +320,8 @@ type closureExecutor struct { processor closureProcessor counts []int64 + ndvs []int64 + curNdv int64 } type closureProcessor interface { @@ -339,6 +346,9 @@ type idxScanCtx struct { columnLen int colInfos []rowcodec.ColInfo primaryColumnIds []int64 + + collectNdv bool + previousVals [][]byte } type aggCtx struct { @@ -362,6 +372,7 @@ func (e *closureExecutor) execute() ([]tipb.Chunk, error) { } dbReader := e.dbReader for i, ran := range e.kvRanges { + e.curNdv = 0 if e.isPointGetRange(ran) { val, err := dbReader.Get(ran.StartKey, e.startTS) if err != nil { @@ -372,6 +383,7 @@ func (e *closureExecutor) execute() ([]tipb.Chunk, error) { } if e.counts != nil { e.counts[i]++ + e.ndvs[i] = e.curNdv } err = e.processor.Process(ran.StartKey, val) if err != nil { @@ -387,6 +399,7 @@ func (e *closureExecutor) execute() ([]tipb.Chunk, error) { delta := int64(e.rowCount - oldCnt) if e.counts != nil { e.counts[i] += delta + e.ndvs[i] = e.curNdv } if err != nil { return nil, errors.Trace(err) @@ -591,6 +604,7 @@ func (e *closureExecutor) tableScanProcessCore(key, value []byte) error { if err != nil { return errors.Trace(err) } + e.curNdv++ return nil } @@ -619,6 +633,15 @@ func (e *indexScanProcessor) Finish() error { return e.scanFinish() } +func (isc *idxScanCtx) checkVal(curVals [][]byte) bool { + for i := 0; i < isc.columnLen; i++ { + if bytes.Compare(isc.previousVals[i], curVals[i]) != 0 { + return false + } + } + return true +} + func (e *closureExecutor) indexScanProcessCore(key, value []byte) error { handleStatus := mapPkStatusToHandleStatus(e.idxScanCtx.pkStatus) restoredCols := make([]rowcodec.ColInfo, 0, len(e.idxScanCtx.colInfos)) @@ -631,6 +654,14 @@ func (e *closureExecutor) indexScanProcessCore(key, value []byte) error { if err != nil { return err } + if e.idxScanCtx.collectNdv { + if len(e.idxScanCtx.previousVals[0]) == 0 || !e.idxScanCtx.checkVal(values) { + e.curNdv++ + for i := 0; i < e.idxScanCtx.columnLen; i++ { + e.idxScanCtx.previousVals[i] = append(e.idxScanCtx.previousVals[i][:0], values[i]...) + } + } + } chk := e.scanCtx.chk decoder := codec.NewDecoder(chk, e.sc.TimeZone) for i, colVal := range values { diff --git a/store/mockstore/unistore/cophandler/cop_handler.go b/store/mockstore/unistore/cophandler/cop_handler.go index 41453f85a34b6..2dc6528374359 100644 --- a/store/mockstore/unistore/cophandler/cop_handler.go +++ b/store/mockstore/unistore/cophandler/cop_handler.go @@ -74,10 +74,10 @@ func handleCopDAGRequest(dbReader *dbreader.DBReader, lockStore *lockstore.MemSt } closureExec, err := buildClosureExecutor(dagCtx, dagReq) if err != nil { - return buildResp(nil, nil, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime)) + return buildResp(nil, nil, nil, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime)) } chunks, err := closureExec.execute() - return buildResp(chunks, closureExec.counts, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime)) + return buildResp(chunks, closureExec.counts, closureExec.ndvs, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime)) } func buildDAG(reader *dbreader.DBReader, lockStore *lockstore.MemStore, req *coprocessor.Request) (*dagContext, *tipb.DAGRequest, error) { @@ -268,12 +268,13 @@ func (e *ErrLocked) Error() string { return fmt.Sprintf("key is locked, key: %q, Type: %v, primary: %q, startTS: %v", e.Key, e.LockType, e.Primary, e.StartTS) } -func buildResp(chunks []tipb.Chunk, counts []int64, dagReq *tipb.DAGRequest, err error, warnings []stmtctx.SQLWarn, dur time.Duration) *coprocessor.Response { +func buildResp(chunks []tipb.Chunk, counts, ndvs []int64, dagReq *tipb.DAGRequest, err error, warnings []stmtctx.SQLWarn, dur time.Duration) *coprocessor.Response { resp := &coprocessor.Response{} selResp := &tipb.SelectResponse{ Error: toPBError(err), Chunks: chunks, OutputCounts: counts, + Ndvs: ndvs, } if dagReq.CollectExecutionSummaries != nil && *dagReq.CollectExecutionSummaries { execSummary := make([]*tipb.ExecutorExecutionSummary, len(dagReq.Executors)) From b4bdd723fa460daad4bb283978f3e530d1b5f939 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Mon, 26 Oct 2020 20:08:27 +0800 Subject: [PATCH 2/9] address comments --- executor/analyze.go | 8 +++++++- statistics/handle/dump.go | 4 ++-- statistics/handle/dump_test.go | 2 +- statistics/handle/handle.go | 6 +++--- statistics/handle/update.go | 8 +++++--- statistics/histogram.go | 3 +++ 6 files changed, 21 insertions(+), 10 deletions(-) diff --git a/executor/analyze.go b/executor/analyze.go index a23f7aa26a6bd..0f6d7cfc566e9 100755 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -112,7 +112,7 @@ func (e *AnalyzeExec) Next(ctx context.Context, req *chunk.Chunk) error { continue } for i, hg := range result.Hist { - err1 := statsHandle.SaveStatsToStorage(result.PhysicalTableID, result.Count, result.IsIndex, hg, result.Cms[i], 1) + err1 := statsHandle.SaveStatsToStorage(result.PhysicalTableID, result.Count, result.IsIndex, hg, result.Cms[i], statistics.CurStatsVersion, 1) if err1 != nil { err = err1 logutil.Logger(ctx).Error("save stats to storage failed", zap.Error(err)) @@ -239,6 +239,7 @@ func analyzeIndexPushdown(idxExec *AnalyzeIndexExec) analyzeResult { Cms: []*statistics.CMSketch{cms}, IsIndex: 1, job: idxExec.job, + StatsVer: statistics.CurStatsVersion, } result.Count = hist.NullCount if hist.Len() > 0 { @@ -401,6 +402,7 @@ func analyzeColumnsPushdown(colExec *AnalyzeColumnsExec) analyzeResult { Cms: cms, ExtStats: extStats, job: colExec.job, + StatsVer: statistics.Version0, } hist := hists[0] result.Count = hist.NullCount @@ -598,6 +600,7 @@ func analyzeFastExec(exec *AnalyzeFastExec) []analyzeResult { IsIndex: 1, Count: hists[i].NullCount, job: exec.job, + StatsVer: statistics.Version1, } if hists[i].Len() > 0 { idxResult.Count += hists[i].Buckets[hists[i].Len()-1].Count @@ -1220,6 +1223,7 @@ func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult Cms: []*statistics.CMSketch{cms}, IsIndex: 1, job: idxExec.job, + StatsVer: statistics.Version2, } result.Count = hist.NullCount if hist.Len() > 0 { @@ -1257,6 +1261,7 @@ func analyzePKIncremental(colExec *analyzePKIncrementalExec) analyzeResult { Hist: []*statistics.Histogram{hist}, Cms: []*statistics.CMSketch{nil}, job: colExec.job, + StatsVer: statistics.Version0, } if hist.Len() > 0 { result.Count += hist.Buckets[hist.Len()-1].Count @@ -1275,4 +1280,5 @@ type analyzeResult struct { IsIndex int Err error job *statistics.AnalyzeJob + StatsVer int64 } diff --git a/statistics/handle/dump.go b/statistics/handle/dump.go index 16295569d76c0..8d7d3f18c2f71 100644 --- a/statistics/handle/dump.go +++ b/statistics/handle/dump.go @@ -156,13 +156,13 @@ func (h *Handle) loadStatsFromJSON(tableInfo *model.TableInfo, physicalID int64, } for _, col := range tbl.Columns { - err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, &col.Histogram, col.CMSketch, 1) + err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 0, &col.Histogram, col.CMSketch, 1, 0) if err != nil { return errors.Trace(err) } } for _, idx := range tbl.Indices { - err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 1, &idx.Histogram, idx.CMSketch, 1) + err = h.SaveStatsToStorage(tbl.PhysicalID, tbl.Count, 1, &idx.Histogram, idx.CMSketch, idx.StatsVer, 1) if err != nil { return errors.Trace(err) } diff --git a/statistics/handle/dump_test.go b/statistics/handle/dump_test.go index 2d4dcc52ff637..0f9fef1b7bdee 100644 --- a/statistics/handle/dump_test.go +++ b/statistics/handle/dump_test.go @@ -150,7 +150,7 @@ func (s *testStatsSuite) TestDumpCMSketchWithTopN(c *C) { cms, _, _ := statistics.NewCMSketchWithTopN(5, 2048, fakeData, 20, 100) stat := h.GetTableStats(tableInfo) - err = h.SaveStatsToStorage(tableInfo.ID, 1, 0, &stat.Columns[tableInfo.Columns[0].ID].Histogram, cms, 1) + err = h.SaveStatsToStorage(tableInfo.ID, 1, 0, &stat.Columns[tableInfo.Columns[0].ID].Histogram, cms, statistics.CurStatsVersion, 1) c.Assert(err, IsNil) c.Assert(h.Update(is), IsNil) diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index fd43de6188ef4..0f010227e55f9 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -635,7 +635,7 @@ func (h *Handle) extendedStatsFromStorage(reader *statsReader, table *statistics } // SaveStatsToStorage saves the stats to storage. -func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg *statistics.Histogram, cms *statistics.CMSketch, isAnalyzed int64) (err error) { +func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg *statistics.Histogram, cms *statistics.CMSketch, statsVer int64, isAnalyzed int64) (err error) { h.mu.Lock() defer h.mu.Unlock() ctx := context.TODO() @@ -673,8 +673,8 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg if isAnalyzed == 1 { flag = statistics.AnalyzeFlag } - sqls = append(sqls, fmt.Sprintf("replace into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, flag, correlation) values (%d, %d, %d, %d, %d, %d, X'%X', %d, %d, %d, %f)", - tableID, isIndex, hg.ID, hg.NDV, version, hg.NullCount, data, hg.TotColSize, statistics.CurStatsVersion, flag, hg.Correlation)) + sqls = append(sqls, fmt.Sprintf("replace into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, statsVer, flag, correlation) values (%d, %d, %d, %d, %d, %d, X'%X', %d, %d, %d, %f)", + tableID, isIndex, hg.ID, hg.NDV, version, hg.NullCount, data, hg.TotColSize, statsVer, flag, hg.Correlation)) sqls = append(sqls, fmt.Sprintf("delete from mysql.stats_buckets where table_id = %d and is_index = %d and hist_id = %d", tableID, isIndex, hg.ID)) sc := h.mu.ctx.GetSessionVars().StmtCtx var lastAnalyzePos []byte diff --git a/statistics/handle/update.go b/statistics/handle/update.go index 5d0de1ea7079e..a2da21eb60b5b 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -564,9 +564,11 @@ func (h *Handle) handleSingleHistogramUpdate(is infoschema.InfoSchema, rows []ch } var cms *statistics.CMSketch var hist *statistics.Histogram + var statsVer int64 = 0 if isIndex == 1 { idx, ok := tbl.Indices[histID] if ok && idx.Histogram.Len() > 0 { + statsVer = idx.StatsVer idxHist := idx.Histogram hist = &idxHist cms = idx.CMSketch.Copy() @@ -589,7 +591,7 @@ func (h *Handle) handleSingleHistogramUpdate(is infoschema.InfoSchema, rows []ch logutil.BgLogger().Debug("decode feedback failed", zap.Error(err)) } } - err = h.dumpStatsUpdateToKV(physicalTableID, isIndex, q, hist, cms) + err = h.dumpStatsUpdateToKV(physicalTableID, isIndex, q, hist, cms, statsVer) return errors.Trace(err) } @@ -608,9 +610,9 @@ func (h *Handle) deleteOutdatedFeedback(tableID, histID, isIndex int64) error { return nil } -func (h *Handle) dumpStatsUpdateToKV(tableID, isIndex int64, q *statistics.QueryFeedback, hist *statistics.Histogram, cms *statistics.CMSketch) error { +func (h *Handle) dumpStatsUpdateToKV(tableID, isIndex int64, q *statistics.QueryFeedback, hist *statistics.Histogram, cms *statistics.CMSketch, statsVer int64) error { hist = statistics.UpdateHistogram(hist, q) - err := h.SaveStatsToStorage(tableID, -1, int(isIndex), hist, cms, 0) + err := h.SaveStatsToStorage(tableID, -1, int(isIndex), hist, cms, statsVer, 0) metrics.UpdateStatsCounter.WithLabelValues(metrics.RetLabel(err)).Inc() return errors.Trace(err) } diff --git a/statistics/histogram.go b/statistics/histogram.go index 1d15e15f95d6a..c54231d1fff30 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -274,6 +274,9 @@ func HistogramEqual(a, b *Histogram, ignoreID bool) bool { // If the version number is 0, it means the most original statistics. const ( CurStatsVersion = Version2 + + // Version0 is the most early statistics only histogram. + Version0 = 0 // Version1 added CMSketch. Version1 = 1 // Version2 added bucket NDV for index's full analyze. From 7e6638e0584e950141abd68c91c652a59e767a43 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Tue, 15 Dec 2020 03:19:40 +0800 Subject: [PATCH 3/9] add version control and make feedback collect work again --- distsql/select_result.go | 2 +- executor/analyze.go | 12 +++-- planner/core/planbuilder.go | 2 +- statistics/builder.go | 20 +++++-- statistics/feedback.go | 2 +- statistics/feedback_test.go | 2 +- statistics/handle/update.go | 6 +-- statistics/histogram.go | 52 +++++++++++++++++-- statistics/sample_test.go | 2 +- statistics/statistics_test.go | 6 +-- statistics/table.go | 2 +- store/mockstore/mocktikv/analyze.go | 4 +- .../mockstore/unistore/cophandler/analyze.go | 14 +++-- .../unistore/cophandler/closure_exec.go | 2 +- 14 files changed, 96 insertions(+), 32 deletions(-) diff --git a/distsql/select_result.go b/distsql/select_result.go index b2e3ae327dd24..7208928732f60 100644 --- a/distsql/select_result.go +++ b/distsql/select_result.go @@ -147,7 +147,7 @@ func (r *selectResult) fetchResp(ctx context.Context) error { sc.AppendWarning(dbterror.ClassTiKV.Synthesize(terror.ErrCode(warning.Code), warning.Msg)) } if r.feedback != nil { - logutil.BgLogger().Warn("select resp", zap.Int64s("output cnt", r.selectResp.OutputCounts), zap.Int64s("ndvs", r.selectResp.Ndvs)) + // logutil.BgLogger().Warn("select resp", zap.Int64s("output cnt", r.selectResp.OutputCounts), zap.Int64s("ndvs", r.selectResp.Ndvs)) r.feedback.Update(resultSubset.GetStartKey(), r.selectResp.OutputCounts, r.selectResp.Ndvs) } r.partialCount++ diff --git a/executor/analyze.go b/executor/analyze.go index ece7a77ec26b6..b62135178a05a 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -333,6 +333,10 @@ func (e *AnalyzeIndexExec) buildStatsFromResult(result distsql.SelectResult, nee cms = statistics.NewCMSketch(int32(e.opts[ast.AnalyzeOptCMSketchDepth]), int32(e.opts[ast.AnalyzeOptCMSketchWidth])) topn = statistics.NewTopN(int(e.opts[ast.AnalyzeOptNumTopN])) } + statsVer := statistics.Version1 + if e.analyzePB.IdxReq.Version != nil { + statsVer = int(*e.analyzePB.IdxReq.Version) + } for { data, err := result.NextRaw(context.TODO()) if err != nil { @@ -348,7 +352,7 @@ func (e *AnalyzeIndexExec) buildStatsFromResult(result distsql.SelectResult, nee } respHist := statistics.HistogramFromProto(resp.Hist) e.job.Update(int64(respHist.TotalRowCount())) - hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets])) + hist, err = statistics.MergeHistograms(e.ctx.GetSessionVars().StmtCtx, hist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets]), statsVer) if err != nil { return nil, nil, nil, err } @@ -535,7 +539,7 @@ func (e *AnalyzeColumnsExec) buildStats(ranges []*ranger.Range, needExtStats boo if hasPkHist(e.handleCols) { respHist := statistics.HistogramFromProto(resp.PkHist) rowCount = int64(respHist.TotalRowCount()) - pkHist, err = statistics.MergeHistograms(sc, pkHist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets])) + pkHist, err = statistics.MergeHistograms(sc, pkHist, respHist, int(e.opts[ast.AnalyzeOptNumBuckets]), statistics.Version1) if err != nil { return nil, nil, nil, nil, err } @@ -1212,7 +1216,7 @@ func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult if err != nil { return analyzeResult{Err: err, job: idxExec.job} } - hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, idxExec.oldHist, hist, int(idxExec.opts[ast.AnalyzeOptNumBuckets])) + hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, idxExec.oldHist, hist, int(idxExec.opts[ast.AnalyzeOptNumBuckets]), statistics.Version1) if err != nil { return analyzeResult{Err: err, job: idxExec.job} } @@ -1263,7 +1267,7 @@ func analyzePKIncremental(colExec *analyzePKIncrementalExec) analyzeResult { return analyzeResult{Err: err, job: colExec.job} } hist := hists[0] - hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, colExec.oldHist, hist, int(colExec.opts[ast.AnalyzeOptNumBuckets])) + hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, colExec.oldHist, hist, int(colExec.opts[ast.AnalyzeOptNumBuckets]), statistics.Version1) if err != nil { return analyzeResult{Err: err, job: colExec.job} } diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index 4065fec337b9b..09bd268afafc3 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -3441,7 +3441,7 @@ func buildShowSchema(s *ast.ShowStmt, isView bool, isSequence bool) (schema *exp names = []string{"Db_name", "Table_name", "Partition_name", "Column_name", "Is_index", "Bucket_id", "Count", "Repeats", "Lower_Bound", "Upper_Bound", "Ndv"} ftypes = []byte{mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeTiny, mysql.TypeLonglong, - mysql.TypeLonglong, mysql.TypeLonglong, mysql.TypeVarchar, mysql.TypeVarchar} + mysql.TypeLonglong, mysql.TypeLonglong, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeLonglong} case ast.ShowStatsTopN: names = []string{"Db_name", "Table_name", "Partition_name", "Column_name", "Is_index", "Value", "Count"} ftypes = []byte{mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeVarchar, mysql.TypeTiny, mysql.TypeVarchar, mysql.TypeLonglong} diff --git a/statistics/builder.go b/statistics/builder.go index 8168c64acfb24..ce57023d80198 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -29,15 +29,17 @@ type SortedBuilder struct { bucketIdx int64 Count int64 hist *Histogram + statsVer int } // NewSortedBuilder creates a new SortedBuilder. -func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *types.FieldType) *SortedBuilder { +func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *types.FieldType, statsVer int) *SortedBuilder { return &SortedBuilder{ sc: sc, numBuckets: numBuckets, valuesPerBucket: 1, hist: NewHistogram(id, 0, 0, 0, tp, int(numBuckets), 0), + statsVer: statsVer, } } @@ -49,8 +51,16 @@ func (b *SortedBuilder) Hist() *Histogram { // Iterate updates the histogram incrementally. func (b *SortedBuilder) Iterate(data types.Datum) error { b.Count++ + appendBucket := b.hist.AppendBucket + updateLastBucket := b.hist.updateLastBucket + if b.statsVer == Version2 { + updateLastBucket = b.hist.updateLastBucketV2 + appendBucket = func(lower, upper *types.Datum, count, repeat int64) { + b.hist.AppendBucketWithNDV(lower, upper, count, repeat, 1) + } + } if b.Count == 1 { - b.hist.AppendBucketWithNDV(&data, &data, 1, 1, 1) + appendBucket(&data, &data, 1, 1) b.hist.NDV = 1 return nil } @@ -66,7 +76,7 @@ func (b *SortedBuilder) Iterate(data types.Datum) error { b.hist.Buckets[b.bucketIdx].Repeat++ } else if b.hist.Buckets[b.bucketIdx].Count+1-b.lastNumber <= b.valuesPerBucket { // The bucket still have room to store a new item, update the bucket. - b.hist.updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1) + updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1) b.hist.NDV++ } else { // All buckets are full, we should merge buckets. @@ -82,11 +92,11 @@ func (b *SortedBuilder) Iterate(data types.Datum) error { } // We may merge buckets, so we should check it again. if b.hist.Buckets[b.bucketIdx].Count+1-b.lastNumber <= b.valuesPerBucket { - b.hist.updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1) + updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1) } else { b.lastNumber = b.hist.Buckets[b.bucketIdx].Count b.bucketIdx++ - b.hist.AppendBucketWithNDV(&data, &data, b.lastNumber+1, 1, 1) + appendBucket(&data, &data, b.lastNumber+1, 1) } b.hist.NDV++ } diff --git a/statistics/feedback.go b/statistics/feedback.go index bc7ee2ae1ac1f..1f042b85d41cc 100644 --- a/statistics/feedback.go +++ b/statistics/feedback.go @@ -743,7 +743,7 @@ func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int6 } // UpdateHistogram updates the histogram according buckets. -func UpdateHistogram(h *Histogram, feedback *QueryFeedback) *Histogram { +func UpdateHistogram(h *Histogram, feedback *QueryFeedback, statsVer int) *Histogram { buckets, isNewBuckets, totalCount := splitBuckets(h, feedback) ndvs := make([]int64, len(buckets)) for i := range buckets { diff --git a/statistics/feedback_test.go b/statistics/feedback_test.go index 345f1435fe46d..12b87ffa57c3f 100644 --- a/statistics/feedback_test.go +++ b/statistics/feedback_test.go @@ -73,7 +73,7 @@ func (s *testFeedbackSuite) TestUpdateHistogram(c *C) { originBucketCount := defaultBucketCount defaultBucketCount = 7 defer func() { defaultBucketCount = originBucketCount }() - c.Assert(UpdateHistogram(q.Hist, q).ToString(0), Equals, + c.Assert(UpdateHistogram(q.Hist, q, Version2).ToString(0), Equals, "column:0 ndv:10053 totColSize:0\n"+ "num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0 ndv: 2\n"+ "num: 7 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 2\n"+ diff --git a/statistics/handle/update.go b/statistics/handle/update.go index c0dc411687857..d06f0b8268d28 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -573,7 +573,7 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) { ranFB = statistics.CleanRangeFeedbackByTopN(ranFB, idx.TopN) } newIdx.CMSketch, newIdx.TopN = statistics.UpdateCMSketchAndTopN(idx.CMSketch, idx.TopN, eqFB) - newIdx.Histogram = *statistics.UpdateHistogram(&idx.Histogram, &statistics.QueryFeedback{Feedback: ranFB}) + newIdx.Histogram = *statistics.UpdateHistogram(&idx.Histogram, &statistics.QueryFeedback{Feedback: ranFB}, int(idx.StatsVer)) newIdx.Histogram.PreCalculateScalar() newIdx.Flag = statistics.ResetAnalyzeFlag(newIdx.Flag) newTblStats.Indices[fb.Hist.ID] = &newIdx @@ -587,7 +587,7 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) { _, ranFB := statistics.SplitFeedbackByQueryType(fb.Feedback) newFB := &statistics.QueryFeedback{Feedback: ranFB} newFB = newFB.DecodeIntValues() - newCol.Histogram = *statistics.UpdateHistogram(&col.Histogram, newFB) + newCol.Histogram = *statistics.UpdateHistogram(&col.Histogram, newFB, statistics.Version1) newCol.Flag = statistics.ResetAnalyzeFlag(newCol.Flag) newTblStats.Columns[fb.Hist.ID] = &newCol } @@ -763,7 +763,7 @@ func (h *Handle) deleteOutdatedFeedback(tableID, histID, isIndex int64) error { } func (h *Handle) dumpStatsUpdateToKV(tableID, isIndex int64, q *statistics.QueryFeedback, hist *statistics.Histogram, cms *statistics.CMSketch, topN *statistics.TopN, statsVersion int64) error { - hist = statistics.UpdateHistogram(hist, q) + hist = statistics.UpdateHistogram(hist, q, int(statsVersion)) err := h.SaveStatsToStorage(tableID, -1, int(isIndex), hist, cms, topN, int(statsVersion), 0) metrics.UpdateStatsCounter.WithLabelValues(metrics.RetLabel(err)).Inc() return errors.Trace(err) diff --git a/statistics/histogram.go b/statistics/histogram.go index a7a84bb5b2ca6..6411829b29f8b 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -214,7 +214,7 @@ func (c *Column) AvgColSizeListInDisk(count int64) float64 { // AppendBucket appends a bucket into `hg`. func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64) { - hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat, NDV: 1}) + hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat, NDV: 0}) hg.Bounds.AppendDatum(0, lower) hg.Bounds.AppendDatum(0, upper) } @@ -232,7 +232,15 @@ func (hg *Histogram) updateLastBucket(upper *types.Datum, count, repeat int64) { hg.Bounds.AppendDatum(0, upper) hg.Buckets[len-1].Count = count hg.Buckets[len-1].Repeat = repeat - hg.Buckets[len-1].NDV++ +} + +func (hg *Histogram) updateLastBucketV2(upper *types.Datum, count, repeat int64) { + hg.updateLastBucket(upper, count, repeat) + l := hg.Len() + // The sampling case doesn't hold NDV since the low sampling rate. So check the NDV here. + if hg.Buckets[l-1].NDV > 0 { + hg.Buckets[l-1].NDV++ + } } // DecodeTo decodes the histogram bucket values into `Tp`. @@ -383,6 +391,26 @@ func (hg *Histogram) ToString(idxCols int) string { // equalRowCount estimates the row count where the column equals to value. func (hg *Histogram) equalRowCount(value types.Datum) float64 { + index, match := hg.Bounds.LowerBound(0, &value) + // Since we store the lower and upper bound together, if the index is an odd number, then it points to a upper bound. + if index%2 == 1 { + if match { + return float64(hg.Buckets[index/2].Repeat) + } + return hg.notNullCount() / float64(hg.NDV) + } + if match { + cmp := chunk.GetCompareFunc(hg.Tp) + if cmp(hg.Bounds.GetRow(index), 0, hg.Bounds.GetRow(index+1), 0) == 0 { + return float64(hg.Buckets[index/2].Repeat) + } + return hg.notNullCount() / float64(hg.NDV) + } + return 0 +} + +// equalRowCountV2 estimates the row count where the column equals to value. +func (hg *Histogram) equalRowCountV2(value types.Datum) float64 { index, match := hg.Bounds.LowerBound(0, &value) // Since we store the lower and upper bound together, if the index is an odd number, then it points to a upper bound. if index%2 == 1 { @@ -703,7 +731,7 @@ func (hg *Histogram) IsIndexHist() bool { } // MergeHistograms merges two histograms. -func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int) (*Histogram, error) { +func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, bucketSize int, statsVer int) (*Histogram, error) { if lh.Len() == 0 { return rh, nil } @@ -719,7 +747,9 @@ func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, offset := int64(0) if cmp == 0 { lh.NDV-- - lh.Buckets[len(lh.Buckets)-1].NDV-- + if rh.Buckets[0].NDV > 0 { + lh.Buckets[lLen-1].NDV += rh.Buckets[0].NDV - 1 + } lh.updateLastBucket(rh.GetUpper(0), lh.Buckets[lLen-1].Count+rh.Buckets[0].Count, rh.Buckets[0].Repeat) offset = rh.Buckets[0].Count rh.popFirstBucket() @@ -746,6 +776,10 @@ func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, rAvg *= 2 } for i := 0; i < rh.Len(); i++ { + if statsVer == Version2 { + lh.AppendBucketWithNDV(rh.GetLower(i), rh.GetUpper(i), rh.Buckets[i].Count+lCount-offset, rh.Buckets[i].Repeat, rh.Buckets[i].NDV) + continue + } lh.AppendBucket(rh.GetLower(i), rh.GetUpper(i), rh.Buckets[i].Count+lCount-offset, rh.Buckets[i].Repeat) } for lh.Len() > bucketSize { @@ -1036,9 +1070,17 @@ func (idx *Index) equalRowCount(b []byte, modifyCount int64) float64 { if idx.NDV > 0 && idx.outOfRange(val) { return outOfRangeEQSelectivity(idx.NDV, modifyCount, int64(idx.TotalRowCount())) * idx.TotalRowCount() } - if idx.CMSketch != nil && (len(idx.Histogram.Buckets) == 0 || idx.Histogram.Buckets[0].NDV == 0) { + if idx.CMSketch != nil && idx.StatsVer == Version1 { return float64(idx.QueryBytes(b)) } + // If it's version2, query the top-n first. + if idx.StatsVer == Version2 { + count, found := idx.TopN.QueryTopN(b) + if found { + return float64(count) + } + return idx.Histogram.equalRowCountV2(val) + } return idx.Histogram.equalRowCount(val) } diff --git a/statistics/sample_test.go b/statistics/sample_test.go index 34d3f31117db9..1a9647505b547 100644 --- a/statistics/sample_test.go +++ b/statistics/sample_test.go @@ -60,7 +60,7 @@ func (s *testSampleSuite) TestCollectColumnStats(c *C) { Sc: sc, RecordSet: s.rs, ColLen: 1, - PkBuilder: NewSortedBuilder(sc, 256, 1, types.NewFieldType(mysql.TypeLonglong)), + PkBuilder: NewSortedBuilder(sc, 256, 1, types.NewFieldType(mysql.TypeLonglong), Version2), MaxSampleSize: 10000, MaxBucketSize: 256, MaxFMSketchSize: 1000, diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go index 60f4e32a9eb03..460d7d1a2c44f 100644 --- a/statistics/statistics_test.go +++ b/statistics/statistics_test.go @@ -180,7 +180,7 @@ func encodeKey(key types.Datum) types.Datum { } func buildPK(sctx sessionctx.Context, numBuckets, id int64, records sqlexec.RecordSet) (int64, *Histogram, error) { - b := NewSortedBuilder(sctx.GetSessionVars().StmtCtx, numBuckets, id, types.NewFieldType(mysql.TypeLonglong)) + b := NewSortedBuilder(sctx.GetSessionVars().StmtCtx, numBuckets, id, types.NewFieldType(mysql.TypeLonglong), Version1) ctx := context.Background() for { req := records.NewChunk() @@ -204,7 +204,7 @@ func buildPK(sctx sessionctx.Context, numBuckets, id int64, records sqlexec.Reco } func buildIndex(sctx sessionctx.Context, numBuckets, id int64, records sqlexec.RecordSet) (int64, *Histogram, *CMSketch, error) { - b := NewSortedBuilder(sctx.GetSessionVars().StmtCtx, numBuckets, id, types.NewFieldType(mysql.TypeBlob)) + b := NewSortedBuilder(sctx.GetSessionVars().StmtCtx, numBuckets, id, types.NewFieldType(mysql.TypeBlob), Version1) cms := NewCMSketch(8, 2048) ctx := context.Background() req := records.NewChunk() @@ -403,7 +403,7 @@ func (s *testStatisticsSuite) TestMergeHistogram(c *C) { for _, t := range tests { lh := mockHistogram(t.leftLower, t.leftNum) rh := mockHistogram(t.rightLower, t.rightNum) - h, err := MergeHistograms(sc, lh, rh, bucketCount) + h, err := MergeHistograms(sc, lh, rh, bucketCount, Version1) c.Assert(err, IsNil) c.Assert(h.NDV, Equals, t.ndv) c.Assert(h.Len(), Equals, t.bucketNum) diff --git a/statistics/table.go b/statistics/table.go index 79cba7ae26456..afaa2afffe083 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -428,7 +428,7 @@ func (coll *HistColl) GetRowCountByIndexRanges(sc *stmtctx.StatementContext, idx } var result float64 var err error - if idx.CMSketch != nil && idx.StatsVer != Version0 { + if idx.CMSketch != nil && idx.StatsVer == Version1 { result, err = coll.getIndexRowCount(sc, idxID, indexRanges) } else { result, err = idx.GetRowCount(sc, indexRanges, coll.ModifyCount) diff --git a/store/mockstore/mocktikv/analyze.go b/store/mockstore/mocktikv/analyze.go index fa0d9384694c8..a575f5536015d 100644 --- a/store/mockstore/mocktikv/analyze.go +++ b/store/mockstore/mocktikv/analyze.go @@ -81,7 +81,7 @@ func (h *rpcHandler) handleAnalyzeIndexReq(req *coprocessor.Request, analyzeReq execDetail: new(execDetail), hdStatus: tablecodec.HandleNotNeeded, } - statsBuilder := statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob)) + statsBuilder := statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob), statistics.Version1) var cms *statistics.CMSketch if analyzeReq.IdxReq.CmsketchDepth != nil && analyzeReq.IdxReq.CmsketchWidth != nil { cms = statistics.NewCMSketch(*analyzeReq.IdxReq.CmsketchDepth, *analyzeReq.IdxReq.CmsketchWidth) @@ -212,7 +212,7 @@ func (h *rpcHandler) handleAnalyzeColumnsReq(req *coprocessor.Request, analyzeRe ColsFieldType: fts, } if pkID != -1 { - builder.PkBuilder = statistics.NewSortedBuilder(sc, builder.MaxBucketSize, pkID, types.NewFieldType(mysql.TypeBlob)) + builder.PkBuilder = statistics.NewSortedBuilder(sc, builder.MaxBucketSize, pkID, types.NewFieldType(mysql.TypeBlob), statistics.Version1) } if colReq.CmsketchWidth != nil && colReq.CmsketchDepth != nil { builder.CMSketchWidth = *colReq.CmsketchWidth diff --git a/store/mockstore/unistore/cophandler/analyze.go b/store/mockstore/unistore/cophandler/analyze.go index 329335a70af5e..f2e980023163b 100644 --- a/store/mockstore/unistore/cophandler/analyze.go +++ b/store/mockstore/unistore/cophandler/analyze.go @@ -83,7 +83,7 @@ func handleAnalyzeIndexReq(dbReader *dbreader.DBReader, rans []kv.KeyRange, anal } processor := &analyzeIndexProcessor{ colLen: int(analyzeReq.IdxReq.NumColumns), - statsBuilder: statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob)), + statsBuilder: statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob), int(statsVer)), statsVer: statsVer, } if analyzeReq.IdxReq.TopNSize != nil { @@ -133,9 +133,13 @@ func handleAnalyzeIndexReq(dbReader *dbreader.DBReader, rans []kv.KeyRange, anal } func handleAnalyzeCommonHandleReq(dbReader *dbreader.DBReader, rans []kv.KeyRange, analyzeReq *tipb.AnalyzeReq, startTS uint64) (*coprocessor.Response, error) { + statsVer := statistics.Version1 + if analyzeReq.IdxReq.Version != nil { + statsVer = int(*analyzeReq.IdxReq.Version) + } processor := &analyzeCommonHandleProcessor{ colLen: int(analyzeReq.IdxReq.NumColumns), - statsBuilder: statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob)), + statsBuilder: statistics.NewSortedBuilder(flagsToStatementContext(analyzeReq.Flags), analyzeReq.IdxReq.BucketSize, 0, types.NewFieldType(mysql.TypeBlob), statsVer), } if analyzeReq.IdxReq.CmsketchDepth != nil && analyzeReq.IdxReq.CmsketchWidth != nil { processor.cms = statistics.NewCMSketch(*analyzeReq.IdxReq.CmsketchDepth, *analyzeReq.IdxReq.CmsketchWidth) @@ -308,8 +312,12 @@ func handleAnalyzeColumnsReq(dbReader *dbreader.DBReader, rans []kv.KeyRange, an Collators: collators, ColsFieldType: fts, } + statsVer := statistics.Version1 + if analyzeReq.ColReq.Version != nil { + statsVer = int(*analyzeReq.ColReq.Version) + } if pkID != -1 { - builder.PkBuilder = statistics.NewSortedBuilder(sc, builder.MaxBucketSize, pkID, types.NewFieldType(mysql.TypeBlob)) + builder.PkBuilder = statistics.NewSortedBuilder(sc, builder.MaxBucketSize, pkID, types.NewFieldType(mysql.TypeBlob), statsVer) } if colReq.CmsketchWidth != nil && colReq.CmsketchDepth != nil { builder.CMSketchWidth = *colReq.CmsketchWidth diff --git a/store/mockstore/unistore/cophandler/closure_exec.go b/store/mockstore/unistore/cophandler/closure_exec.go index 48c56389b59c6..3b0f70d3a6cae 100644 --- a/store/mockstore/unistore/cophandler/closure_exec.go +++ b/store/mockstore/unistore/cophandler/closure_exec.go @@ -844,7 +844,7 @@ type idxScanCtx struct { colInfos []rowcodec.ColInfo primaryColumnIds []int64 execDetail *execDetail - collectNDV bool + collectNDV bool prevVals [][]byte } From 05d588ba4a2dc4207165156750e3862f1640c618 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Tue, 15 Dec 2020 13:58:08 +0800 Subject: [PATCH 4/9] fix ndv when extract topn out of hist --- statistics/histogram.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/statistics/histogram.go b/statistics/histogram.go index 6411829b29f8b..658a84b1ed48e 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -362,6 +362,9 @@ func (hg *Histogram) RemoveIdxVals(idxValCntPairs []TopNMeta) { break } totalSubCnt += int64(idxValCntPairs[pairIdx].Count) + if hg.Buckets[bktIdx].NDV > 0 { + hg.Buckets[bktIdx].NDV-- + } pairIdx++ if cmpResult == 0 { hg.Buckets[bktIdx].Repeat = 0 From 73262224188f52e01acb68dcf70e80b7e6cf3e9b Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Tue, 22 Dec 2020 02:03:16 +0800 Subject: [PATCH 5/9] address comments and fix --- session/bootstrap.go | 2 +- statistics/builder.go | 13 +++-- statistics/histogram.go | 54 ++++++------------- statistics/statistics_test.go | 8 +-- statistics/table.go | 3 -- .../unistore/cophandler/cop_handler.go | 2 +- 6 files changed, 27 insertions(+), 55 deletions(-) diff --git a/session/bootstrap.go b/session/bootstrap.go index c0b037dcdcadd..f752e7c048eea 100644 --- a/session/bootstrap.go +++ b/session/bootstrap.go @@ -1240,7 +1240,7 @@ func writeMemoryQuotaQuery(s Session) { } func upgradeToVer57(s Session, ver int64) { - if ver >= version53 { + if ver >= version57 { return } doReentrantDDL(s, "ALTER TABLE mysql.stats_buckets ADD COLUMN `ndv` bigint not null default 0", infoschema.ErrColumnExists) diff --git a/statistics/builder.go b/statistics/builder.go index ce57023d80198..a21b39be7bc87 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -30,6 +30,7 @@ type SortedBuilder struct { Count int64 hist *Histogram statsVer int + needBucketNDV bool } // NewSortedBuilder creates a new SortedBuilder. @@ -39,7 +40,7 @@ func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *ty numBuckets: numBuckets, valuesPerBucket: 1, hist: NewHistogram(id, 0, 0, 0, tp, int(numBuckets), 0), - statsVer: statsVer, + needBucketNDV: statsVer == Version2, } } @@ -52,9 +53,7 @@ func (b *SortedBuilder) Hist() *Histogram { func (b *SortedBuilder) Iterate(data types.Datum) error { b.Count++ appendBucket := b.hist.AppendBucket - updateLastBucket := b.hist.updateLastBucket - if b.statsVer == Version2 { - updateLastBucket = b.hist.updateLastBucketV2 + if b.needBucketNDV { appendBucket = func(lower, upper *types.Datum, count, repeat int64) { b.hist.AppendBucketWithNDV(lower, upper, count, repeat, 1) } @@ -76,7 +75,7 @@ func (b *SortedBuilder) Iterate(data types.Datum) error { b.hist.Buckets[b.bucketIdx].Repeat++ } else if b.hist.Buckets[b.bucketIdx].Count+1-b.lastNumber <= b.valuesPerBucket { // The bucket still have room to store a new item, update the bucket. - updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1) + b.hist.updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1, b.needBucketNDV) b.hist.NDV++ } else { // All buckets are full, we should merge buckets. @@ -92,7 +91,7 @@ func (b *SortedBuilder) Iterate(data types.Datum) error { } // We may merge buckets, so we should check it again. if b.hist.Buckets[b.bucketIdx].Count+1-b.lastNumber <= b.valuesPerBucket { - updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1) + b.hist.updateLastBucket(&data, b.hist.Buckets[b.bucketIdx].Count+1, 1, b.needBucketNDV) } else { b.lastNumber = b.hist.Buckets[b.bucketIdx].Count b.bucketIdx++ @@ -160,7 +159,7 @@ func BuildColumnHist(ctx sessionctx.Context, numBuckets, id int64, collector *Sa } } else if totalCount-float64(lastCount) <= valuesPerBucket { // The bucket still have room to store a new item, update the bucket. - hg.updateLastBucket(&samples[i].Value, int64(totalCount), int64(ndvFactor)) + hg.updateLastBucket(&samples[i].Value, int64(totalCount), int64(ndvFactor), false) } else { lastCount = hg.Buckets[bucketIdx].Count // The bucket is full, store the item in the next bucket. diff --git a/statistics/histogram.go b/statistics/histogram.go index 658a84b1ed48e..02b775ce47004 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -226,21 +226,16 @@ func (hg *Histogram) AppendBucketWithNDV(lower *types.Datum, upper *types.Datum, hg.Bounds.AppendDatum(0, upper) } -func (hg *Histogram) updateLastBucket(upper *types.Datum, count, repeat int64) { - len := hg.Len() - hg.Bounds.TruncateTo(2*len - 1) - hg.Bounds.AppendDatum(0, upper) - hg.Buckets[len-1].Count = count - hg.Buckets[len-1].Repeat = repeat -} - -func (hg *Histogram) updateLastBucketV2(upper *types.Datum, count, repeat int64) { - hg.updateLastBucket(upper, count, repeat) +func (hg *Histogram) updateLastBucket(upper *types.Datum, count, repeat int64, needBucketNDV bool) { l := hg.Len() + hg.Bounds.TruncateTo(2*l-1) + hg.Bounds.AppendDatum(0, upper) // The sampling case doesn't hold NDV since the low sampling rate. So check the NDV here. - if hg.Buckets[l-1].NDV > 0 { + if needBucketNDV && hg.Buckets[l-1].NDV > 0 { hg.Buckets[l-1].NDV++ } + hg.Buckets[l-1].Count = count + hg.Buckets[l-1].Repeat = repeat } // DecodeTo decodes the histogram bucket values into `Tp`. @@ -393,34 +388,14 @@ func (hg *Histogram) ToString(idxCols int) string { } // equalRowCount estimates the row count where the column equals to value. -func (hg *Histogram) equalRowCount(value types.Datum) float64 { - index, match := hg.Bounds.LowerBound(0, &value) - // Since we store the lower and upper bound together, if the index is an odd number, then it points to a upper bound. - if index%2 == 1 { - if match { - return float64(hg.Buckets[index/2].Repeat) - } - return hg.notNullCount() / float64(hg.NDV) - } - if match { - cmp := chunk.GetCompareFunc(hg.Tp) - if cmp(hg.Bounds.GetRow(index), 0, hg.Bounds.GetRow(index+1), 0) == 0 { - return float64(hg.Buckets[index/2].Repeat) - } - return hg.notNullCount() / float64(hg.NDV) - } - return 0 -} - -// equalRowCountV2 estimates the row count where the column equals to value. -func (hg *Histogram) equalRowCountV2(value types.Datum) float64 { +func (hg *Histogram) equalRowCount(value types.Datum, hasBucketNDV bool) float64 { index, match := hg.Bounds.LowerBound(0, &value) // Since we store the lower and upper bound together, if the index is an odd number, then it points to a upper bound. if index%2 == 1 { if match { return float64(hg.Buckets[index/2].Repeat) } - if hg.Buckets[index/2].NDV > 0 { + if hasBucketNDV && hg.Buckets[index/2].NDV > 0 { return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV) } return hg.notNullCount() / float64(hg.NDV) @@ -430,7 +405,7 @@ func (hg *Histogram) equalRowCountV2(value types.Datum) float64 { if cmp(hg.Bounds.GetRow(index), 0, hg.Bounds.GetRow(index+1), 0) == 0 { return float64(hg.Buckets[index/2].Repeat) } - if hg.Buckets[index/2].NDV > 0 { + if hasBucketNDV && hg.Buckets[index/2].NDV > 0 { return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV) } return hg.notNullCount() / float64(hg.NDV) @@ -439,8 +414,9 @@ func (hg *Histogram) equalRowCountV2(value types.Datum) float64 { } // greaterRowCount estimates the row count where the column greater than value. +// It's deprecated. Only used for test. func (hg *Histogram) greaterRowCount(value types.Datum) float64 { - gtCount := hg.notNullCount() - hg.lessRowCount(value) - hg.equalRowCount(value) + gtCount := hg.notNullCount() - hg.lessRowCount(value) - hg.equalRowCount(value, false) return math.Max(0, gtCount) } @@ -753,7 +729,7 @@ func MergeHistograms(sc *stmtctx.StatementContext, lh *Histogram, rh *Histogram, if rh.Buckets[0].NDV > 0 { lh.Buckets[lLen-1].NDV += rh.Buckets[0].NDV - 1 } - lh.updateLastBucket(rh.GetUpper(0), lh.Buckets[lLen-1].Count+rh.Buckets[0].Count, rh.Buckets[0].Repeat) + lh.updateLastBucket(rh.GetUpper(0), lh.Buckets[lLen-1].Count+rh.Buckets[0].Count, rh.Buckets[0].Repeat, false) offset = rh.Buckets[0].Count rh.popFirstBucket() } @@ -925,7 +901,7 @@ func (c *Column) equalRowCount(sc *stmtctx.StatementContext, val types.Datum, mo count, err := queryValue(sc, c.CMSketch, c.TopN, val) return float64(count), errors.Trace(err) } - return c.Histogram.equalRowCount(val), nil + return c.Histogram.equalRowCount(val, false), nil } // GetColumnRowCount estimates the row count by a slice of Range. @@ -1082,9 +1058,9 @@ func (idx *Index) equalRowCount(b []byte, modifyCount int64) float64 { if found { return float64(count) } - return idx.Histogram.equalRowCountV2(val) + return idx.Histogram.equalRowCount(val, true) } - return idx.Histogram.equalRowCount(val) + return idx.Histogram.equalRowCount(val, false) } // QueryBytes is used to query the count of specified bytes. diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go index 460d7d1a2c44f..837cdae704a26 100644 --- a/statistics/statistics_test.go +++ b/statistics/statistics_test.go @@ -258,7 +258,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) { checkRepeats(c, col) col.PreCalculateScalar() c.Check(col.Len(), Equals, 226) - count := col.equalRowCount(types.NewIntDatum(1000)) + count := col.equalRowCount(types.NewIntDatum(1000), false) c.Check(int(count), Equals, 0) count = col.lessRowCount(types.NewIntDatum(1000)) c.Check(int(count), Equals, 10000) @@ -270,7 +270,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) { c.Check(int(count), Equals, 100000) count = col.greaterRowCount(types.NewIntDatum(200000000)) c.Check(count, Equals, 0.0) - count = col.equalRowCount(types.NewIntDatum(200000000)) + count = col.equalRowCount(types.NewIntDatum(200000000), false) c.Check(count, Equals, 0.0) count = col.BetweenRowCount(types.NewIntDatum(3000), types.NewIntDatum(3500)) c.Check(int(count), Equals, 4994) @@ -300,7 +300,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) { checkRepeats(c, col) col.PreCalculateScalar() c.Check(int(tblCount), Equals, 100000) - count = col.equalRowCount(encodeKey(types.NewIntDatum(10000))) + count = col.equalRowCount(encodeKey(types.NewIntDatum(10000)), false) c.Check(int(count), Equals, 1) count = col.lessRowCount(encodeKey(types.NewIntDatum(20000))) c.Check(int(count), Equals, 19999) @@ -317,7 +317,7 @@ func (s *testStatisticsSuite) TestBuild(c *C) { checkRepeats(c, col) col.PreCalculateScalar() c.Check(int(tblCount), Equals, 100000) - count = col.equalRowCount(types.NewIntDatum(10000)) + count = col.equalRowCount(types.NewIntDatum(10000), false) c.Check(int(count), Equals, 1) count = col.lessRowCount(types.NewIntDatum(20000)) c.Check(int(count), Equals, 20000) diff --git a/statistics/table.go b/statistics/table.go index afaa2afffe083..0d104084a4e55 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -632,9 +632,6 @@ func (coll *HistColl) getEqualCondSelectivity(sc *stmtctx.StatementContext, idx } return outOfRangeEQSelectivity(ndv, coll.ModifyCount, int64(idx.TotalRowCount())), nil } - if coverAll && len(idx.Histogram.Buckets) > 0 && idx.Histogram.Buckets[0].NDV > 0 { - return idx.Histogram.equalRowCount(val), nil - } minRowCount, crossValidationSelectivity, err := coll.crossValidationSelectivity(sc, idx, usedColsLen, idxPointRange) if err != nil { diff --git a/store/mockstore/unistore/cophandler/cop_handler.go b/store/mockstore/unistore/cophandler/cop_handler.go index 1dc48d6f3e657..40cb1a8c7203a 100644 --- a/store/mockstore/unistore/cophandler/cop_handler.go +++ b/store/mockstore/unistore/cophandler/cop_handler.go @@ -147,7 +147,7 @@ func handleCopDAGRequest(dbReader *dbreader.DBReader, lockStore *lockstore.MemSt } return nil } - return buildResp(chunks, closureExec, []int64{}, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime)) + return buildResp(chunks, closureExec, closureExec.ndvs, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime)) } func buildDAG(reader *dbreader.DBReader, lockStore *lockstore.MemStore, req *coprocessor.Request) (*dagContext, *tipb.DAGRequest, error) { From da257227f5fcd726e65b99492bbd656d11c0680d Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 23 Dec 2020 14:13:08 +0800 Subject: [PATCH 6/9] address comments --- distsql/select_result.go | 1 - go.mod | 2 +- go.sum | 2 ++ session/bootstrap.go | 1 + statistics/builder.go | 3 +-- statistics/handle/update.go | 6 ++---- statistics/histogram.go | 10 +++++----- 7 files changed, 12 insertions(+), 13 deletions(-) diff --git a/distsql/select_result.go b/distsql/select_result.go index 7208928732f60..fd1cd7c846931 100644 --- a/distsql/select_result.go +++ b/distsql/select_result.go @@ -147,7 +147,6 @@ func (r *selectResult) fetchResp(ctx context.Context) error { sc.AppendWarning(dbterror.ClassTiKV.Synthesize(terror.ErrCode(warning.Code), warning.Msg)) } if r.feedback != nil { - // logutil.BgLogger().Warn("select resp", zap.Int64s("output cnt", r.selectResp.OutputCounts), zap.Int64s("ndvs", r.selectResp.Ndvs)) r.feedback.Update(resultSubset.GetStartKey(), r.selectResp.OutputCounts, r.selectResp.Ndvs) } r.partialCount++ diff --git a/go.mod b/go.mod index 5d618f226c23f..3fe6961f639b7 100644 --- a/go.mod +++ b/go.mod @@ -51,7 +51,7 @@ require ( github.com/pingcap/sysutil v0.0.0-20201130064824-f0c8aa6a6966 github.com/pingcap/tidb-lightning v4.0.9-0.20201106041742-a1ac97827a27+incompatible github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible - github.com/pingcap/tipb v0.0.0-20201210091214-70edbc366d92 + github.com/pingcap/tipb v0.0.0-20201215091753-bd0cb2b314a4 github.com/prometheus/client_golang v1.5.1 github.com/prometheus/client_model v0.2.0 github.com/prometheus/common v0.9.1 diff --git a/go.sum b/go.sum index 3aa9291da5399..7fd465e28e730 100644 --- a/go.sum +++ b/go.sum @@ -715,6 +715,8 @@ github.com/pingcap/tipb v0.0.0-20201209065231-aa39b1b86217 h1:Ophn4Ud/QHp1BH0FJO github.com/pingcap/tipb v0.0.0-20201209065231-aa39b1b86217/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= github.com/pingcap/tipb v0.0.0-20201210091214-70edbc366d92 h1:+EomCEPnE5MI0HD10wyoiYj1At57midQ4TagtvV9bmY= github.com/pingcap/tipb v0.0.0-20201210091214-70edbc366d92/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= +github.com/pingcap/tipb v0.0.0-20201215091753-bd0cb2b314a4 h1:x64INZ8imEXO3MFcWD99lYlp52V9ZdYrxj74ynfyg3c= +github.com/pingcap/tipb v0.0.0-20201215091753-bd0cb2b314a4/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= github.com/pingcap/tiup v1.2.3 h1:8OCQF7sHhT6VqE8pZU1JTSogPA90OFuWWM/B746x0YY= github.com/pingcap/tiup v1.2.3/go.mod h1:q8WzflNHjE1U49k2qstTL0clx2pKh8pkOzUFV4RTvQo= github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4/go.mod h1:4OwLy04Bl9Ef3GJJCoec+30X3LQs/0/m4HFRt/2LUSA= diff --git a/session/bootstrap.go b/session/bootstrap.go index d361089102319..413c143b9eca7 100644 --- a/session/bootstrap.go +++ b/session/bootstrap.go @@ -509,6 +509,7 @@ var ( upgradeToVer56, upgradeToVer57, upgradeToVer58, + upgradeToVer59, } ) diff --git a/statistics/builder.go b/statistics/builder.go index a21b39be7bc87..c58c7dc286428 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -29,7 +29,6 @@ type SortedBuilder struct { bucketIdx int64 Count int64 hist *Histogram - statsVer int needBucketNDV bool } @@ -40,7 +39,7 @@ func NewSortedBuilder(sc *stmtctx.StatementContext, numBuckets, id int64, tp *ty numBuckets: numBuckets, valuesPerBucket: 1, hist: NewHistogram(id, 0, 0, 0, tp, int(numBuckets), 0), - needBucketNDV: statsVer == Version2, + needBucketNDV: statsVer == Version2, } } diff --git a/statistics/handle/update.go b/statistics/handle/update.go index b27923f6da3a5..409e06efcbda0 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -162,9 +162,9 @@ func (s *SessionStatsCollector) Update(id int64, delta int64, count int64, colSi var ( // MinLogScanCount is the minimum scan count for a feedback to be logged. - MinLogScanCount = int64(1) + MinLogScanCount = int64(1000) // MinLogErrorRate is the minimum error rate for a feedback to be logged. - MinLogErrorRate = 0.0 + MinLogErrorRate = 0.5 ) // StoreQueryFeedback merges the feedback into stats collector. @@ -549,7 +549,6 @@ func (h *Handle) DumpFeedbackToKV(fb *statistics.QueryFeedback) error { // feedback locally on this tidb-server, so it could be used more timely. func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) { h.sweepList() - logutil.BgLogger().Warn("local feedback update") for _, fbs := range h.feedback.Feedbacks { for _, fb := range fbs { h.mu.Lock() @@ -565,7 +564,6 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) { if !ok || idx.Histogram.Len() == 0 { continue } - logutil.BgLogger().Warn("local feedback update index") newIdx := *idx eqFB, ranFB := statistics.SplitFeedbackByQueryType(fb.Feedback) // For StatsVersion higher than Version1, the topn is extracted out of histogram. So we don't update the histogram if the feedback overlaps with some topn. diff --git a/statistics/histogram.go b/statistics/histogram.go index a1dc7cd7110b8..8e4ba627cfb61 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -225,7 +225,7 @@ func (hg *Histogram) AppendBucketWithNDV(lower *types.Datum, upper *types.Datum, func (hg *Histogram) updateLastBucket(upper *types.Datum, count, repeat int64, needBucketNDV bool) { l := hg.Len() - hg.Bounds.TruncateTo(2*l-1) + hg.Bounds.TruncateTo(2*l - 1) hg.Bounds.AppendDatum(0, upper) // The sampling case doesn't hold NDV since the low sampling rate. So check the NDV here. if needBucketNDV && hg.Buckets[l-1].NDV > 0 { @@ -392,8 +392,8 @@ func (hg *Histogram) equalRowCount(value types.Datum, hasBucketNDV bool) float64 if match { return float64(hg.Buckets[index/2].Repeat) } - if hasBucketNDV && hg.Buckets[index/2].NDV > 0 { - return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV) + if hasBucketNDV && hg.Buckets[index/2].NDV > 1 { + return float64(hg.bucketCount(index/2)-hg.Buckets[index/2].Repeat) / float64(hg.Buckets[index/2].NDV-1) } return hg.notNullCount() / float64(hg.NDV) } @@ -402,8 +402,8 @@ func (hg *Histogram) equalRowCount(value types.Datum, hasBucketNDV bool) float64 if cmp(hg.Bounds.GetRow(index), 0, hg.Bounds.GetRow(index+1), 0) == 0 { return float64(hg.Buckets[index/2].Repeat) } - if hasBucketNDV && hg.Buckets[index/2].NDV > 0 { - return float64(hg.bucketCount(index/2)) / float64(hg.Buckets[index/2].NDV) + if hasBucketNDV && hg.Buckets[index/2].NDV > 1 { + return float64(hg.bucketCount(index/2)-hg.Buckets[index/2].Repeat) / float64(hg.Buckets[index/2].NDV-1) } return hg.notNullCount() / float64(hg.NDV) } From 1bf01759e6260e85846a7d94d94b365b0144d5ff Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 23 Dec 2020 21:40:36 +0800 Subject: [PATCH 7/9] fix tests and adderss comments --- .../r/explain_complex_stats.result | 16 +-- cmd/explaintest/r/explain_easy_stats.result | 22 ++--- cmd/explaintest/r/explain_indexmerge.result | 6 +- cmd/explaintest/r/explain_join_stats.result | 18 ++-- executor/analyze_test.go | 97 ++++++++++--------- executor/show_stats_test.go | 24 ++--- planner/core/cbo_test.go | 8 +- planner/core/testdata/analyze_suite_out.json | 16 ++- .../core/testdata/integration_suite_out.json | 4 +- statistics/feedback.go | 15 +-- statistics/handle/update_test.go | 92 +++++++++--------- statistics/histogram.go | 2 +- statistics/histogram_test.go | 28 +++--- statistics/statistics_test.go | 2 +- statistics/testdata/stats_suite_out.json | 8 +- .../unistore/cophandler/cop_handler.go | 5 + util/testkit/testkit.go | 2 +- 17 files changed, 183 insertions(+), 182 deletions(-) diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result index 7cacac73febc0..aed18d787a36c 100644 --- a/cmd/explaintest/r/explain_complex_stats.result +++ b/cmd/explaintest/r/explain_complex_stats.result @@ -115,14 +115,14 @@ PRIMARY KEY (aid,dic) load stats 's/explain_complex_stats_rr.json'; explain SELECT ds, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(dic) as install_device FROM dt use index (cm) WHERE (ds >= '2016-09-01') AND (ds <= '2016-11-03') AND (cm IN ('1062', '1086', '1423', '1424', '1425', '1426', '1427', '1428', '1429', '1430', '1431', '1432', '1433', '1434', '1435', '1436', '1437', '1438', '1439', '1440', '1441', '1442', '1443', '1444', '1445', '1446', '1447', '1448', '1449', '1450', '1451', '1452', '1488', '1489', '1490', '1491', '1492', '1493', '1494', '1495', '1496', '1497', '1550', '1551', '1552', '1553', '1554', '1555', '1556', '1557', '1558', '1559', '1597', '1598', '1599', '1600', '1601', '1602', '1603', '1604', '1605', '1606', '1607', '1608', '1609', '1610', '1611', '1612', '1613', '1614', '1615', '1616', '1623', '1624', '1625', '1626', '1627', '1628', '1629', '1630', '1631', '1632', '1709', '1719', '1720', '1843', '2813', '2814', '2815', '2816', '2817', '2818', '2819', '2820', '2821', '2822', '2823', '2824', '2825', '2826', '2827', '2828', '2829', '2830', '2831', '2832', '2833', '2834', '2835', '2836', '2837', '2838', '2839', '2840', '2841', '2842', '2843', '2844', '2845', '2846', '2847', '2848', '2849', '2850', '2851', '2852', '2853', '2854', '2855', '2856', '2857', '2858', '2859', '2860', '2861', '2862', '2863', '2864', '2865', '2866', '2867', '2868', '2869', '2870', '2871', '2872', '3139', '3140', '3141', '3142', '3143', '3144', '3145', '3146', '3147', '3148', '3149', '3150', '3151', '3152', '3153', '3154', '3155', '3156', '3157', '3158', '3386', '3387', '3388', '3389', '3390', '3391', '3392', '3393', '3394', '3395', '3664', '3665', '3666', '3667', '3668', '3670', '3671', '3672', '3673', '3674', '3676', '3677', '3678', '3679', '3680', '3681', '3682', '3683', '3684', '3685', '3686', '3687', '3688', '3689', '3690', '3691', '3692', '3693', '3694', '3695', '3696', '3697', '3698', '3699', '3700', '3701', '3702', '3703', '3704', '3705', '3706', '3707', '3708', '3709', '3710', '3711', '3712', '3713', '3714', '3715', '3960', '3961', '3962', '3963', '3964', '3965', '3966', '3967', '3968', '3978', '3979', '3980', '3981', '3982', '3983', '3984', '3985', '3986', '3987', '4208', '4209', '4210', '4211', '4212', '4304', '4305', '4306', '4307', '4308', '4866', '4867', '4868', '4869', '4870', '4871', '4872', '4873', '4874', '4875')) GROUP BY ds, p1, p2, p3, p4, p5, p6_md5, p7_md5 ORDER BY ds2 DESC; id estRows task access object operator info -Projection_7 308.93 root test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, Column#21 -└─Sort_8 308.93 root test.dt.ds2:desc - └─HashAgg_16 308.93 root group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(Column#32)->Column#21, funcs:firstrow(test.dt.ds)->test.dt.ds, funcs:firstrow(Column#34)->test.dt.ds2, funcs:firstrow(test.dt.p1)->test.dt.p1, funcs:firstrow(test.dt.p2)->test.dt.p2, funcs:firstrow(test.dt.p3)->test.dt.p3, funcs:firstrow(test.dt.p4)->test.dt.p4, funcs:firstrow(test.dt.p5)->test.dt.p5, funcs:firstrow(test.dt.p6_md5)->test.dt.p6_md5, funcs:firstrow(test.dt.p7_md5)->test.dt.p7_md5 - └─IndexLookUp_17 308.93 root - ├─IndexRangeScan_13(Build) 1841.60 cop[tikv] table:dt, index:cm(cm) range:[1062,1062], [1086,1086], [1423,1423], [1424,1424], [1425,1425], [1426,1426], [1427,1427], [1428,1428], [1429,1429], [1430,1430], [1431,1431], [1432,1432], [1433,1433], [1434,1434], [1435,1435], [1436,1436], [1437,1437], [1438,1438], [1439,1439], [1440,1440], [1441,1441], [1442,1442], [1443,1443], [1444,1444], [1445,1445], [1446,1446], [1447,1447], [1448,1448], [1449,1449], [1450,1450], [1451,1451], [1452,1452], [1488,1488], [1489,1489], [1490,1490], [1491,1491], [1492,1492], [1493,1493], [1494,1494], [1495,1495], [1496,1496], [1497,1497], [1550,1550], [1551,1551], [1552,1552], [1553,1553], [1554,1554], [1555,1555], [1556,1556], [1557,1557], [1558,1558], [1559,1559], [1597,1597], [1598,1598], [1599,1599], [1600,1600], [1601,1601], [1602,1602], [1603,1603], [1604,1604], [1605,1605], [1606,1606], [1607,1607], [1608,1608], [1609,1609], [1610,1610], [1611,1611], [1612,1612], [1613,1613], [1614,1614], [1615,1615], [1616,1616], [1623,1623], [1624,1624], [1625,1625], [1626,1626], [1627,1627], [1628,1628], [1629,1629], [1630,1630], [1631,1631], [1632,1632], [1709,1709], [1719,1719], [1720,1720], [1843,1843], [2813,2813], [2814,2814], [2815,2815], [2816,2816], [2817,2817], [2818,2818], [2819,2819], [2820,2820], [2821,2821], [2822,2822], [2823,2823], [2824,2824], [2825,2825], [2826,2826], [2827,2827], [2828,2828], [2829,2829], [2830,2830], [2831,2831], [2832,2832], [2833,2833], [2834,2834], [2835,2835], [2836,2836], [2837,2837], [2838,2838], [2839,2839], [2840,2840], [2841,2841], [2842,2842], [2843,2843], [2844,2844], [2845,2845], [2846,2846], [2847,2847], [2848,2848], [2849,2849], [2850,2850], [2851,2851], [2852,2852], [2853,2853], [2854,2854], [2855,2855], [2856,2856], [2857,2857], [2858,2858], [2859,2859], [2860,2860], [2861,2861], [2862,2862], [2863,2863], [2864,2864], [2865,2865], [2866,2866], [2867,2867], [2868,2868], [2869,2869], [2870,2870], [2871,2871], [2872,2872], [3139,3139], [3140,3140], [3141,3141], [3142,3142], [3143,3143], [3144,3144], [3145,3145], [3146,3146], [3147,3147], [3148,3148], [3149,3149], [3150,3150], [3151,3151], [3152,3152], [3153,3153], [3154,3154], [3155,3155], [3156,3156], [3157,3157], [3158,3158], [3386,3386], [3387,3387], [3388,3388], [3389,3389], [3390,3390], [3391,3391], [3392,3392], [3393,3393], [3394,3394], [3395,3395], [3664,3664], [3665,3665], [3666,3666], [3667,3667], [3668,3668], [3670,3670], [3671,3671], [3672,3672], [3673,3673], [3674,3674], [3676,3676], [3677,3677], [3678,3678], [3679,3679], [3680,3680], [3681,3681], [3682,3682], [3683,3683], [3684,3684], [3685,3685], [3686,3686], [3687,3687], [3688,3688], [3689,3689], [3690,3690], [3691,3691], [3692,3692], [3693,3693], [3694,3694], [3695,3695], [3696,3696], [3697,3697], [3698,3698], [3699,3699], [3700,3700], [3701,3701], [3702,3702], [3703,3703], [3704,3704], [3705,3705], [3706,3706], [3707,3707], [3708,3708], [3709,3709], [3710,3710], [3711,3711], [3712,3712], [3713,3713], [3714,3714], [3715,3715], [3960,3960], [3961,3961], [3962,3962], [3963,3963], [3964,3964], [3965,3965], [3966,3966], [3967,3967], [3968,3968], [3978,3978], [3979,3979], [3980,3980], [3981,3981], [3982,3982], [3983,3983], [3984,3984], [3985,3985], [3986,3986], [3987,3987], [4208,4208], [4209,4209], [4210,4210], [4211,4211], [4212,4212], [4304,4304], [4305,4305], [4306,4306], [4307,4307], [4308,4308], [4866,4866], [4867,4867], [4868,4868], [4869,4869], [4870,4870], [4871,4871], [4872,4872], [4873,4873], [4874,4874], [4875,4875], keep order:false - └─HashAgg_11(Probe) 308.93 cop[tikv] group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(test.dt.dic)->Column#32, funcs:firstrow(test.dt.ds2)->Column#34 - └─Selection_15 309.39 cop[tikv] ge(test.dt.ds, 2016-09-01 00:00:00.000000), le(test.dt.ds, 2016-11-03 00:00:00.000000) - └─TableRowIDScan_14 1841.60 cop[tikv] table:dt keep order:false +Projection_7 21.53 root test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, Column#21 +└─Sort_8 21.53 root test.dt.ds2:desc + └─HashAgg_16 21.53 root group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(Column#32)->Column#21, funcs:firstrow(test.dt.ds)->test.dt.ds, funcs:firstrow(Column#34)->test.dt.ds2, funcs:firstrow(test.dt.p1)->test.dt.p1, funcs:firstrow(test.dt.p2)->test.dt.p2, funcs:firstrow(test.dt.p3)->test.dt.p3, funcs:firstrow(test.dt.p4)->test.dt.p4, funcs:firstrow(test.dt.p5)->test.dt.p5, funcs:firstrow(test.dt.p6_md5)->test.dt.p6_md5, funcs:firstrow(test.dt.p7_md5)->test.dt.p7_md5 + └─IndexLookUp_17 21.53 root + ├─IndexRangeScan_13(Build) 128.32 cop[tikv] table:dt, index:cm(cm) range:[1062,1062], [1086,1086], [1423,1423], [1424,1424], [1425,1425], [1426,1426], [1427,1427], [1428,1428], [1429,1429], [1430,1430], [1431,1431], [1432,1432], [1433,1433], [1434,1434], [1435,1435], [1436,1436], [1437,1437], [1438,1438], [1439,1439], [1440,1440], [1441,1441], [1442,1442], [1443,1443], [1444,1444], [1445,1445], [1446,1446], [1447,1447], [1448,1448], [1449,1449], [1450,1450], [1451,1451], [1452,1452], [1488,1488], [1489,1489], [1490,1490], [1491,1491], [1492,1492], [1493,1493], [1494,1494], [1495,1495], [1496,1496], [1497,1497], [1550,1550], [1551,1551], [1552,1552], [1553,1553], [1554,1554], [1555,1555], [1556,1556], [1557,1557], [1558,1558], [1559,1559], [1597,1597], [1598,1598], [1599,1599], [1600,1600], [1601,1601], [1602,1602], [1603,1603], [1604,1604], [1605,1605], [1606,1606], [1607,1607], [1608,1608], [1609,1609], [1610,1610], [1611,1611], [1612,1612], [1613,1613], [1614,1614], [1615,1615], [1616,1616], [1623,1623], [1624,1624], [1625,1625], [1626,1626], [1627,1627], [1628,1628], [1629,1629], [1630,1630], [1631,1631], [1632,1632], [1709,1709], [1719,1719], [1720,1720], [1843,1843], [2813,2813], [2814,2814], [2815,2815], [2816,2816], [2817,2817], [2818,2818], [2819,2819], [2820,2820], [2821,2821], [2822,2822], [2823,2823], [2824,2824], [2825,2825], [2826,2826], [2827,2827], [2828,2828], [2829,2829], [2830,2830], [2831,2831], [2832,2832], [2833,2833], [2834,2834], [2835,2835], [2836,2836], [2837,2837], [2838,2838], [2839,2839], [2840,2840], [2841,2841], [2842,2842], [2843,2843], [2844,2844], [2845,2845], [2846,2846], [2847,2847], [2848,2848], [2849,2849], [2850,2850], [2851,2851], [2852,2852], [2853,2853], [2854,2854], [2855,2855], [2856,2856], [2857,2857], [2858,2858], [2859,2859], [2860,2860], [2861,2861], [2862,2862], [2863,2863], [2864,2864], [2865,2865], [2866,2866], [2867,2867], [2868,2868], [2869,2869], [2870,2870], [2871,2871], [2872,2872], [3139,3139], [3140,3140], [3141,3141], [3142,3142], [3143,3143], [3144,3144], [3145,3145], [3146,3146], [3147,3147], [3148,3148], [3149,3149], [3150,3150], [3151,3151], [3152,3152], [3153,3153], [3154,3154], [3155,3155], [3156,3156], [3157,3157], [3158,3158], [3386,3386], [3387,3387], [3388,3388], [3389,3389], [3390,3390], [3391,3391], [3392,3392], [3393,3393], [3394,3394], [3395,3395], [3664,3664], [3665,3665], [3666,3666], [3667,3667], [3668,3668], [3670,3670], [3671,3671], [3672,3672], [3673,3673], [3674,3674], [3676,3676], [3677,3677], [3678,3678], [3679,3679], [3680,3680], [3681,3681], [3682,3682], [3683,3683], [3684,3684], [3685,3685], [3686,3686], [3687,3687], [3688,3688], [3689,3689], [3690,3690], [3691,3691], [3692,3692], [3693,3693], [3694,3694], [3695,3695], [3696,3696], [3697,3697], [3698,3698], [3699,3699], [3700,3700], [3701,3701], [3702,3702], [3703,3703], [3704,3704], [3705,3705], [3706,3706], [3707,3707], [3708,3708], [3709,3709], [3710,3710], [3711,3711], [3712,3712], [3713,3713], [3714,3714], [3715,3715], [3960,3960], [3961,3961], [3962,3962], [3963,3963], [3964,3964], [3965,3965], [3966,3966], [3967,3967], [3968,3968], [3978,3978], [3979,3979], [3980,3980], [3981,3981], [3982,3982], [3983,3983], [3984,3984], [3985,3985], [3986,3986], [3987,3987], [4208,4208], [4209,4209], [4210,4210], [4211,4211], [4212,4212], [4304,4304], [4305,4305], [4306,4306], [4307,4307], [4308,4308], [4866,4866], [4867,4867], [4868,4868], [4869,4869], [4870,4870], [4871,4871], [4872,4872], [4873,4873], [4874,4874], [4875,4875], keep order:false + └─HashAgg_11(Probe) 21.53 cop[tikv] group by:test.dt.ds, test.dt.p1, test.dt.p2, test.dt.p3, test.dt.p4, test.dt.p5, test.dt.p6_md5, test.dt.p7_md5, funcs:count(test.dt.dic)->Column#32, funcs:firstrow(test.dt.ds2)->Column#34 + └─Selection_15 21.56 cop[tikv] ge(test.dt.ds, 2016-09-01 00:00:00.000000), le(test.dt.ds, 2016-11-03 00:00:00.000000) + └─TableRowIDScan_14 128.32 cop[tikv] table:dt keep order:false explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext, gad.t as gtime from st gad join (select id, aid, pt, dic, ip, t from dd where pt = 'android' and bm = 0 and t > 1478143908) sdk on gad.aid = sdk.aid and gad.ip = sdk.ip and sdk.t > gad.t where gad.t > 1478143908 and gad.bm = 0 and gad.pt = 'android' group by gad.aid, sdk.dic limit 2500; id estRows task access object operator info Projection_13 424.00 root test.st.id, test.dd.id, test.st.aid, test.st.cm, test.dd.dic, test.dd.ip, test.dd.t, test.st.p1, test.st.p2, test.st.p3, test.st.p4, test.st.p5, test.st.p6_md5, test.st.p7_md5, test.st.ext, test.st.t diff --git a/cmd/explaintest/r/explain_easy_stats.result b/cmd/explaintest/r/explain_easy_stats.result index a0d1e57cb8379..c3a46f969837b 100644 --- a/cmd/explaintest/r/explain_easy_stats.result +++ b/cmd/explaintest/r/explain_easy_stats.result @@ -42,16 +42,16 @@ TableReader_6 1999.00 root data:TableRangeScan_5 └─TableRangeScan_5 1999.00 cop[tikv] table:t1 range:(0,+inf], keep order:false explain select t1.c1, t1.c2 from t1 where t1.c2 = 1; id estRows task access object operator info -IndexReader_6 8.00 root index:IndexRangeScan_5 -└─IndexRangeScan_5 8.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false +IndexReader_6 0.00 root index:IndexRangeScan_5 +└─IndexRangeScan_5 0.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false explain select * from t1 left join t2 on t1.c2 = t2.c1 where t1.c1 > 1; id estRows task access object operator info HashJoin_22 2481.25 root left outer join, equal:[eq(test.t1.c2, test.t2.c1)] ├─TableReader_36(Build) 1985.00 root data:Selection_35 │ └─Selection_35 1985.00 cop[tikv] not(isnull(test.t2.c1)) │ └─TableFullScan_34 1985.00 cop[tikv] table:t2 keep order:false -└─TableReader_33(Probe) 1991.00 root data:TableRangeScan_32 - └─TableRangeScan_32 1991.00 cop[tikv] table:t1 range:(1,+inf], keep order:false +└─TableReader_33(Probe) 1998.00 root data:TableRangeScan_32 + └─TableRangeScan_32 1998.00 cop[tikv] table:t1 range:(1,+inf], keep order:false explain update t1 set t1.c2 = 2 where t1.c1 = 1; id estRows task access object operator info Update_3 N/A root N/A @@ -59,9 +59,9 @@ Update_3 N/A root N/A explain delete from t1 where t1.c2 = 1; id estRows task access object operator info Delete_4 N/A root N/A -└─IndexLookUp_11 8.00 root - ├─IndexRangeScan_9(Build) 8.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false - └─TableRowIDScan_10(Probe) 8.00 cop[tikv] table:t1 keep order:false +└─IndexLookUp_11 0.00 root + ├─IndexRangeScan_9(Build) 0.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false + └─TableRowIDScan_10(Probe) 0.00 cop[tikv] table:t1 keep order:false explain select count(b.c2) from t1 a, t2 b where a.c1 = b.c2 group by a.c1; id estRows task access object operator info Projection_11 1985.00 root cast(Column#8, bigint(21) BINARY)->Column#7 @@ -80,10 +80,10 @@ TopN_7 1.00 root test.t2.c2, offset:0, count:1 └─TableFullScan_13 1985.00 cop[tikv] table:t2 keep order:false explain select * from t1 where c1 > 1 and c2 = 1 and c3 < 1; id estRows task access object operator info -IndexLookUp_11 0.51 root -├─IndexRangeScan_8(Build) 1.00 cop[tikv] table:t1, index:c2(c2) range:(1 1,1 +inf], keep order:false -└─Selection_10(Probe) 0.51 cop[tikv] lt(test.t1.c3, 1) - └─TableRowIDScan_9 1.00 cop[tikv] table:t1 keep order:false +IndexLookUp_11 0.00 root +├─IndexRangeScan_8(Build) 0.00 cop[tikv] table:t1, index:c2(c2) range:(1 1,1 +inf], keep order:false +└─Selection_10(Probe) 0.00 cop[tikv] lt(test.t1.c3, 1) + └─TableRowIDScan_9 0.00 cop[tikv] table:t1 keep order:false explain select * from t1 where c1 = 1 and c2 > 1; id estRows task access object operator info Selection_6 0.50 root gt(test.t1.c2, 1) diff --git a/cmd/explaintest/r/explain_indexmerge.result b/cmd/explaintest/r/explain_indexmerge.result index d7eeb2c6f7bd8..ef7f6cdf80088 100644 --- a/cmd/explaintest/r/explain_indexmerge.result +++ b/cmd/explaintest/r/explain_indexmerge.result @@ -99,11 +99,11 @@ label = "cop" set session tidb_enable_index_merge = off; explain select /*+ use_index_merge(t, primary, tb, tc) */ * from t where a <= 500000 or b <= 1000000 or c <= 3000000; id estRows task access object operator info -IndexMerge_9 3570485.44 root -├─TableRangeScan_5(Build) 532767.00 cop[tikv] table:t range:[-inf,500000], keep order:false +IndexMerge_9 3560000.00 root +├─TableRangeScan_5(Build) 500000.00 cop[tikv] table:t range:[-inf,500000], keep order:false ├─IndexRangeScan_6(Build) 1000000.00 cop[tikv] table:t, index:tb(b) range:[-inf,1000000], keep order:false ├─IndexRangeScan_7(Build) 3000000.00 cop[tikv] table:t, index:tc(c) range:[-inf,3000000], keep order:false -└─TableRowIDScan_8(Probe) 3570485.44 cop[tikv] table:t keep order:false +└─TableRowIDScan_8(Probe) 3560000.00 cop[tikv] table:t keep order:false explain select /*+ use_index_merge(t, tb, tc) */ * from t where b < 50 or c < 5000000; id estRows task access object operator info IndexMerge_8 4999999.00 root diff --git a/cmd/explaintest/r/explain_join_stats.result b/cmd/explaintest/r/explain_join_stats.result index 262c52085f21b..723df63732dbe 100644 --- a/cmd/explaintest/r/explain_join_stats.result +++ b/cmd/explaintest/r/explain_join_stats.result @@ -7,25 +7,21 @@ load stats 's/explain_join_stats_lo.json'; explain select count(*) from e, lo where lo.a=e.a and e.b=22336; id estRows task access object operator info StreamAgg_13 1.00 root funcs:count(1)->Column#5 -└─HashJoin_89 20044.00 root inner join, equal:[eq(test.lo.a, test.e.a)] +└─HashJoin_89 19977.00 root inner join, equal:[eq(test.lo.a, test.e.a)] ├─TableReader_50(Build) 250.00 root data:TableFullScan_49 │ └─TableFullScan_49 250.00 cop[tikv] table:lo keep order:false - └─IndexLookUp_61(Probe) 20044.00 root - ├─IndexRangeScan_58(Build) 20044.00 cop[tikv] table:e, index:idx_b(b) range:[22336,22336], keep order:false - └─Selection_60(Probe) 20044.00 cop[tikv] not(isnull(test.e.a)) - └─TableRowIDScan_59 20044.00 cop[tikv] table:e keep order:false + └─IndexLookUp_61(Probe) 19977.00 root + ├─IndexRangeScan_58(Build) 19977.00 cop[tikv] table:e, index:idx_b(b) range:[22336,22336], keep order:false + └─Selection_60(Probe) 19977.00 cop[tikv] not(isnull(test.e.a)) + └─TableRowIDScan_59 19977.00 cop[tikv] table:e keep order:false explain select /*+ TIDB_INLJ(e) */ count(*) from e, lo where lo.a=e.a and e.b=22336; id estRows task access object operator info StreamAgg_12 1.00 root funcs:count(1)->Column#5 -<<<<<<< HEAD -└─IndexJoin_56 20044.00 root inner join, inner:IndexLookUp_55, outer key:test.lo.a, inner key:test.e.a -======= └─IndexJoin_56 19977.00 root inner join, inner:IndexLookUp_55, outer key:test.lo.a, inner key:test.e.a, equal cond:eq(test.lo.a, test.e.a) ->>>>>>> master ├─TableReader_40(Build) 250.00 root data:TableFullScan_39 │ └─TableFullScan_39 250.00 cop[tikv] table:lo keep order:false - └─IndexLookUp_55(Probe) 80.18 root + └─IndexLookUp_55(Probe) 79.91 root ├─Selection_53(Build) 4080.00 cop[tikv] not(isnull(test.e.a)) │ └─IndexRangeScan_51 4080.00 cop[tikv] table:e, index:idx_a(a) range: decided by [eq(test.e.a, test.lo.a)], keep order:false - └─Selection_54(Probe) 80.18 cop[tikv] eq(test.e.b, 22336) + └─Selection_54(Probe) 79.91 cop[tikv] eq(test.e.b, 22336) └─TableRowIDScan_52 4080.00 cop[tikv] table:e keep order:false diff --git a/executor/analyze_test.go b/executor/analyze_test.go index 0c26ef3de110c..d9e654ecd5729 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -461,8 +461,8 @@ func (s *testFastAnalyze) TestFastAnalyze(c *C) { tk.MustExec("insert into t2 values (0), (18446744073709551615)") tk.MustExec("analyze table t2") tk.MustQuery("show stats_buckets where table_name = 't2'").Check(testkit.Rows( - "test t2 a 0 0 1 1 0 0", - "test t2 a 0 1 2 1 18446744073709551615 18446744073709551615")) + "test t2 a 0 0 1 1 0 0 0", + "test t2 a 0 1 2 1 18446744073709551615 18446744073709551615 0")) tk.MustExec(`set @@tidb_partition_prune_mode='` + string(variable.StaticOnly) + `'`) tk.MustExec(`create table t3 (id int, v int, primary key(id), index k(v)) partition by hash (id) partitions 4`) @@ -531,6 +531,7 @@ func (s *testSuite1) TestAnalyzeIncremental(c *C) { } func (s *testSuite1) TestAnalyzeIncrementalStreaming(c *C) { + c.Skip("unistore hasn't support streaming yet.") tk := testkit.NewTestKit(c, s.store) tk.MustExec("use test") tk.Se.GetSessionVars().EnableStreaming = true @@ -545,13 +546,13 @@ func (s *testSuite1) testAnalyzeIncremental(tk *testkit.TestKit, c *C) { tk.MustQuery("show stats_buckets").Check(testkit.Rows()) tk.MustExec("insert into t values (1,1)") tk.MustExec("analyze incremental table t index") - tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1", "test t idx 1 0 1 1 1 1")) + tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1 0", "test t idx 1 0 1 1 1 1 0")) tk.MustExec("insert into t values (2,2)") tk.MustExec("analyze incremental table t index") - tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1", "test t a 0 1 2 1 2 2", "test t idx 1 0 1 1 1 1", "test t idx 1 1 2 1 2 2")) + tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1 0", "test t a 0 1 2 1 2 2 0", "test t idx 1 0 1 1 1 1 0", "test t idx 1 1 2 1 2 2 0")) tk.MustExec("analyze incremental table t index") // Result should not change. - tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1", "test t a 0 1 2 1 2 2", "test t idx 1 0 1 1 1 1", "test t idx 1 1 2 1 2 2")) + tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1 0", "test t a 0 1 2 1 2 2 0", "test t idx 1 0 1 1 1 1 0", "test t idx 1 1 2 1 2 2 0")) // Test analyze incremental with feedback. tk.MustExec("insert into t values (3,3)") @@ -574,7 +575,7 @@ func (s *testSuite1) testAnalyzeIncremental(tk *testkit.TestKit, c *C) { c.Assert(h.DumpStatsFeedbackToKV(), IsNil) c.Assert(h.HandleUpdateStats(is), IsNil) c.Assert(h.Update(is), IsNil) - tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1", "test t a 0 1 3 0 2 2147483647", "test t idx 1 0 1 1 1 1", "test t idx 1 1 2 1 2 2")) + tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1 0", "test t a 0 1 3 0 2 2147483647 0", "test t idx 1 0 1 1 1 1 0", "test t idx 1 1 2 1 2 2 0")) tblStats := h.GetTableStats(tblInfo) val, err := codec.EncodeKey(tk.Se.GetSessionVars().StmtCtx, nil, types.NewIntDatum(3)) c.Assert(err, IsNil) @@ -583,8 +584,8 @@ func (s *testSuite1) testAnalyzeIncremental(tk *testkit.TestKit, c *C) { c.Assert(statistics.IsAnalyzed(tblStats.Columns[tblInfo.Columns[0].ID].Flag), IsFalse) tk.MustExec("analyze incremental table t index") - tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1", "test t a 0 1 2 1 2 2", "test t a 0 2 3 1 3 3", - "test t idx 1 0 1 1 1 1", "test t idx 1 1 2 1 2 2", "test t idx 1 2 3 1 3 3")) + tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1 0", "test t a 0 1 2 1 2 2 0", "test t a 0 2 3 1 3 3 0", + "test t idx 1 0 1 1 1 1 0", "test t idx 1 1 2 1 2 2 0", "test t idx 1 2 3 1 3 3 0")) tblStats = h.GetTableStats(tblInfo) c.Assert(tblStats.Indices[tblInfo.Indices[0].ID].QueryBytes(val), Equals, uint64(1)) } @@ -760,36 +761,36 @@ func (s *testSuite1) TestNormalAnalyzeOnCommonHandle(c *C) { tk.MustExec("analyze table t1, t2, t3") tk.MustQuery(`show stats_buckets where table_name in ("t1", "t2", "t3")`).Sort().Check(testkit.Rows( - "test t1 a 0 0 1 1 1 1", - "test t1 a 0 1 2 1 2 2", - "test t1 a 0 2 3 1 3 3", - "test t1 b 0 0 1 1 1 1", - "test t1 b 0 1 2 1 2 2", - "test t1 b 0 2 3 1 3 3", - "test t2 PRIMARY 1 0 1 1 111 111", - "test t2 PRIMARY 1 1 2 1 222 222", - "test t2 PRIMARY 1 2 3 1 333 333", - "test t2 a 0 0 1 1 111 111", - "test t2 a 0 1 2 1 222 222", - "test t2 a 0 2 3 1 333 333", - "test t2 b 0 0 1 1 1 1", - "test t2 b 0 1 2 1 2 2", - "test t2 b 0 2 3 1 3 3", - "test t3 PRIMARY 1 0 1 1 (1, 1) (1, 1)", - "test t3 PRIMARY 1 1 2 1 (2, 2) (2, 2)", - "test t3 PRIMARY 1 2 3 1 (3, 3) (3, 3)", - "test t3 a 0 0 1 1 1 1", - "test t3 a 0 1 2 1 2 2", - "test t3 a 0 2 3 1 3 3", - "test t3 b 0 0 1 1 1 1", - "test t3 b 0 1 2 1 2 2", - "test t3 b 0 2 3 1 3 3", - "test t3 c 0 0 1 1 1 1", - "test t3 c 0 1 2 1 2 2", - "test t3 c 0 2 3 1 3 3", - "test t3 c 1 0 1 1 1 1", - "test t3 c 1 1 2 1 2 2", - "test t3 c 1 2 3 1 3 3")) + "test t1 a 0 0 1 1 1 1 0", + "test t1 a 0 1 2 1 2 2 0", + "test t1 a 0 2 3 1 3 3 0", + "test t1 b 0 0 1 1 1 1 0", + "test t1 b 0 1 2 1 2 2 0", + "test t1 b 0 2 3 1 3 3 0", + "test t2 PRIMARY 1 0 1 1 111 111 0", + "test t2 PRIMARY 1 1 2 1 222 222 0", + "test t2 PRIMARY 1 2 3 1 333 333 0", + "test t2 a 0 0 1 1 111 111 0", + "test t2 a 0 1 2 1 222 222 0", + "test t2 a 0 2 3 1 333 333 0", + "test t2 b 0 0 1 1 1 1 0", + "test t2 b 0 1 2 1 2 2 0", + "test t2 b 0 2 3 1 3 3 0", + "test t3 PRIMARY 1 0 1 1 (1, 1) (1, 1) 0", + "test t3 PRIMARY 1 1 2 1 (2, 2) (2, 2) 0", + "test t3 PRIMARY 1 2 3 1 (3, 3) (3, 3) 0", + "test t3 a 0 0 1 1 1 1 0", + "test t3 a 0 1 2 1 2 2 0", + "test t3 a 0 2 3 1 3 3 0", + "test t3 b 0 0 1 1 1 1 0", + "test t3 b 0 1 2 1 2 2 0", + "test t3 b 0 2 3 1 3 3 0", + "test t3 c 0 0 1 1 1 1 0", + "test t3 c 0 1 2 1 2 2 0", + "test t3 c 0 2 3 1 3 3 0", + "test t3 c 1 0 1 1 1 1 0", + "test t3 c 1 1 2 1 2 2 0", + "test t3 c 1 2 3 1 3 3 0")) } func (s *testSuite1) TestDefaultValForAnalyze(c *C) { @@ -837,15 +838,15 @@ func (s *testSerialSuite2) TestIssue20874(c *C) { tk.MustExec("insert into t values ('#', 'C'), ('$', 'c'), ('a', 'a')") tk.MustExec("analyze table t") tk.MustQuery("show stats_buckets where db_name = 'test' and table_name = 't'").Sort().Check(testkit.Rows( - "test t a 0 0 1 1 \x02\xd2 \x02\xd2", - "test t a 0 1 2 1 \x0e\x0f \x0e\x0f", - "test t a 0 2 3 1 \x0e3 \x0e3", - "test t b 0 0 1 1 \x00A \x00A", - "test t b 0 1 3 2 \x00C \x00C", - "test t idxa 1 0 1 1 \x02\xd2 \x02\xd2", - "test t idxa 1 1 2 1 \x0e\x0f \x0e\x0f", - "test t idxa 1 2 3 1 \x0e3 \x0e3", - "test t idxb 1 0 1 1 \x00A \x00A", - "test t idxb 1 1 3 2 \x00C \x00C", + "test t a 0 0 1 1 \x02\xd2 \x02\xd2 0", + "test t a 0 1 2 1 \x0e\x0f \x0e\x0f 0", + "test t a 0 2 3 1 \x0e3 \x0e3 0", + "test t b 0 0 1 1 \x00A \x00A 0", + "test t b 0 1 3 2 \x00C \x00C 0", + "test t idxa 1 0 1 1 \x02\xd2 \x02\xd2 0", + "test t idxa 1 1 2 1 \x0e\x0f \x0e\x0f 0", + "test t idxa 1 2 3 1 \x0e3 \x0e3 0", + "test t idxb 1 0 1 1 \x00A \x00A 0", + "test t idxb 1 1 3 2 \x00C \x00C 0", )) } diff --git a/executor/show_stats_test.go b/executor/show_stats_test.go index 270c35f5abf2d..f21ada8ea2b1c 100644 --- a/executor/show_stats_test.go +++ b/executor/show_stats_test.go @@ -80,36 +80,36 @@ func (s *testShowStatsSuite) TestShowStatsBuckets(c *C) { tk.MustExec("insert into t values (1,1)") tk.MustExec("analyze table t") result := tk.MustQuery("show stats_buckets").Sort() - result.Check(testkit.Rows("test t a 0 0 1 1 1 1", "test t b 0 0 1 1 1 1", "test t idx 1 0 1 1 (1, 1) (1, 1)")) + result.Check(testkit.Rows("test t a 0 0 1 1 1 1 0", "test t b 0 0 1 1 1 1 0", "test t idx 1 0 1 1 (1, 1) (1, 1) 0")) result = tk.MustQuery("show stats_buckets where column_name = 'idx'") - result.Check(testkit.Rows("test t idx 1 0 1 1 (1, 1) (1, 1)")) + result.Check(testkit.Rows("test t idx 1 0 1 1 (1, 1) (1, 1) 0")) tk.MustExec("drop table t") tk.MustExec("create table t (`a` datetime, `b` int, key `idx`(`a`, `b`))") tk.MustExec("insert into t values (\"2020-01-01\", 1)") tk.MustExec("analyze table t") result = tk.MustQuery("show stats_buckets").Sort() - result.Check(testkit.Rows("test t a 0 0 1 1 2020-01-01 00:00:00 2020-01-01 00:00:00", "test t b 0 0 1 1 1 1", "test t idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1)")) + result.Check(testkit.Rows("test t a 0 0 1 1 2020-01-01 00:00:00 2020-01-01 00:00:00 0", "test t b 0 0 1 1 1 1 0", "test t idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1) 0")) result = tk.MustQuery("show stats_buckets where column_name = 'idx'") - result.Check(testkit.Rows("test t idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1)")) + result.Check(testkit.Rows("test t idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1) 0")) tk.MustExec("drop table t") tk.MustExec("create table t (`a` date, `b` int, key `idx`(`a`, `b`))") tk.MustExec("insert into t values (\"2020-01-01\", 1)") tk.MustExec("analyze table t") result = tk.MustQuery("show stats_buckets").Sort() - result.Check(testkit.Rows("test t a 0 0 1 1 2020-01-01 2020-01-01", "test t b 0 0 1 1 1 1", "test t idx 1 0 1 1 (2020-01-01, 1) (2020-01-01, 1)")) + result.Check(testkit.Rows("test t a 0 0 1 1 2020-01-01 2020-01-01 0", "test t b 0 0 1 1 1 1 0", "test t idx 1 0 1 1 (2020-01-01, 1) (2020-01-01, 1) 0")) result = tk.MustQuery("show stats_buckets where column_name = 'idx'") - result.Check(testkit.Rows("test t idx 1 0 1 1 (2020-01-01, 1) (2020-01-01, 1)")) + result.Check(testkit.Rows("test t idx 1 0 1 1 (2020-01-01, 1) (2020-01-01, 1) 0")) tk.MustExec("drop table t") tk.MustExec("create table t (`a` timestamp, `b` int, key `idx`(`a`, `b`))") tk.MustExec("insert into t values (\"2020-01-01\", 1)") tk.MustExec("analyze table t") result = tk.MustQuery("show stats_buckets").Sort() - result.Check(testkit.Rows("test t a 0 0 1 1 2020-01-01 00:00:00 2020-01-01 00:00:00", "test t b 0 0 1 1 1 1", "test t idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1)")) + result.Check(testkit.Rows("test t a 0 0 1 1 2020-01-01 00:00:00 2020-01-01 00:00:00 0", "test t b 0 0 1 1 1 1 0", "test t idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1) 0")) result = tk.MustQuery("show stats_buckets where column_name = 'idx'") - result.Check(testkit.Rows("test t idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1)")) + result.Check(testkit.Rows("test t idx 1 0 1 1 (2020-01-01 00:00:00, 1) (2020-01-01 00:00:00, 1) 0")) } func (s *testShowStatsSuite) TestShowStatsHasNullValue(c *C) { @@ -124,14 +124,14 @@ func (s *testShowStatsSuite) TestShowStatsHasNullValue(c *C) { tk.MustExec("insert into t values(1)") tk.MustExec("analyze table t") tk.MustQuery("show stats_buckets").Sort().Check(testkit.Rows( - "test t a 0 0 1 1 1 1", - "test t idx 1 0 1 1 1 1", + "test t a 0 0 1 1 1 1 0", + "test t idx 1 0 1 1 1 1 0", )) tk.MustExec("drop table t") tk.MustExec("create table t (a int, b int, index idx(a, b))") tk.MustExec("insert into t values(NULL, NULL)") tk.MustExec("analyze table t") - tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t idx 1 0 1 1 (NULL, NULL) (NULL, NULL)")) + tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t idx 1 0 1 1 (NULL, NULL) (NULL, NULL) 0")) tk.MustExec("drop table t") tk.MustExec("create table t(a int, b int, c int, index idx_b(b), index idx_c_a(c, a))") @@ -201,7 +201,7 @@ func (s *testShowStatsSuite) TestShowPartitionStats(c *C) { c.Assert(result.Rows()[2][3], Equals, "idx") result = tk.MustQuery("show stats_buckets").Sort() - result.Check(testkit.Rows("test t p0 a 0 0 1 1 1 1", "test t p0 b 0 0 1 1 1 1", "test t p0 idx 1 0 1 1 1 1")) + result.Check(testkit.Rows("test t p0 a 0 0 1 1 1 1 0", "test t p0 b 0 0 1 1 1 1 0", "test t p0 idx 1 0 1 1 1 1 0")) result = tk.MustQuery("show stats_healthy") result.Check(testkit.Rows("test t p0 100")) diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go index 49e16dae4102a..fc60c85817d40 100644 --- a/planner/core/cbo_test.go +++ b/planner/core/cbo_test.go @@ -558,10 +558,10 @@ func (s *testAnalyzeSuite) TestInconsistentEstimation(c *C) { // the `a = 5 and c = 5` will get 10, it is not consistent. tk.MustQuery("explain select * from t use index(ab) where a = 5 and c = 5"). Check(testkit.Rows( - "IndexLookUp_8 7.00 root ", - "├─IndexRangeScan_5(Build) 8.75 cop[tikv] table:t, index:ab(a, b) range:[5,5], keep order:false", - "└─Selection_7(Probe) 7.00 cop[tikv] eq(test.t.c, 5)", - " └─TableRowIDScan_6 8.75 cop[tikv] table:t keep order:false", + "IndexLookUp_8 10.00 root ", + "├─IndexRangeScan_5(Build) 12.50 cop[tikv] table:t, index:ab(a, b) range:[5,5], keep order:false", + "└─Selection_7(Probe) 10.00 cop[tikv] eq(test.t.c, 5)", + " └─TableRowIDScan_6 12.50 cop[tikv] table:t keep order:false", )) } diff --git a/planner/core/testdata/analyze_suite_out.json b/planner/core/testdata/analyze_suite_out.json index 203df046cec34..32d4cb6b49e15 100644 --- a/planner/core/testdata/analyze_suite_out.json +++ b/planner/core/testdata/analyze_suite_out.json @@ -322,7 +322,7 @@ "IndexReader(Index(t.e)[[-inf,10]]->StreamAgg)->StreamAgg", "IndexReader(Index(t.e)[[-inf,50]]->StreamAgg)->StreamAgg", "IndexReader(Index(t.b_c)[[NULL,+inf]]->Sel([gt(test.t.c, 1)])->HashAgg)->HashAgg", - "IndexLookUp(Index(t.e)[[1,1]], Table(t)->HashAgg)->HashAgg", + "IndexLookUp(Index(t.e)[[1,1]], Table(t))->HashAgg", "TableReader(Table(t)->Sel([gt(test.t.e, 1)])->HashAgg)->HashAgg", "IndexLookUp(Index(t.b)[[-inf,20]], Table(t)->HashAgg)->HashAgg", "TableReader(Table(t)->Sel([le(test.t.b, 30)])->StreamAgg)->StreamAgg", @@ -363,20 +363,18 @@ { "SQL": "explain select * from t where a = 7639902", "Plan": [ - "IndexReader_6 499061.16 root index:IndexRangeScan_5", - "└─IndexRangeScan_5 499061.16 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false" + "IndexReader_6 6.68 root index:IndexRangeScan_5", + "└─IndexRangeScan_5 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false" ] }, { "SQL": "explain select c, b from t where a = 7639902 order by b asc limit 6", "Plan": [ "Projection_7 6.00 root test.t.c, test.t.b", - "└─Limit_12 6.00 root offset:0, count:6", - " └─Projection_23 6.00 root test.t.a, test.t.b, test.t.c", - " └─IndexLookUp_22 6.00 root ", - " ├─IndexFullScan_19(Build) 600.00 cop[tikv] table:t, index:b(b) keep order:true", - " └─Selection_21(Probe) 6.00 cop[tikv] eq(test.t.a, 7639902)", - " └─TableRowIDScan_20 600.00 cop[tikv] table:t keep order:false" + "└─TopN_8 6.00 root test.t.b, offset:0, count:6", + " └─IndexReader_16 6.00 root index:TopN_15", + " └─TopN_15 6.00 cop[tikv] test.t.b, offset:0, count:6", + " └─IndexRangeScan_14 6.68 cop[tikv] table:t, index:PRIMARY(a, c, b) range:[7639902,7639902], keep order:false" ] } ] diff --git a/planner/core/testdata/integration_suite_out.json b/planner/core/testdata/integration_suite_out.json index feb86015d54fb..9243534ef3042 100644 --- a/planner/core/testdata/integration_suite_out.json +++ b/planner/core/testdata/integration_suite_out.json @@ -926,8 +926,8 @@ { "SQL": "select * from t1 where t1.a = 1 and t1.b < \"333\"", "Plan": [ - "TableReader_6 1.00 root data:TableRangeScan_5", - "└─TableRangeScan_5 1.00 cop[tikv] table:t1 range:[1 -inf,1 \"333\"), keep order:false" + "TableReader_6 0.67 root data:TableRangeScan_5", + "└─TableRangeScan_5 0.67 cop[tikv] table:t1 range:[1 -inf,1 \"333\"), keep order:false" ], "Res": [ "1 111 1.1000000000 11" diff --git a/statistics/feedback.go b/statistics/feedback.go index 1f042b85d41cc..7e2e4225925ed 100644 --- a/statistics/feedback.go +++ b/statistics/feedback.go @@ -518,7 +518,6 @@ func (b *BucketFeedback) splitBucket(newNumBkts int, totalCount float64, originB countInNewBkt := originBucketCount * ratio ndvInNewBkt := int64(float64(originalNdv) * ratio) countInNewBkt, ndvInNewBkt = b.refineBucketCount(sc, newBkt, countInNewBkt, ndvInNewBkt) - log.Warn("split bucket", zap.Float64("count", countInNewBkt), zap.Int64("ndv", ndvInNewBkt)) // do not split if the count of result bucket is too small. if countInNewBkt < minBucketFraction*totalCount { bounds[i] = bounds[i-1] @@ -744,18 +743,20 @@ func splitBuckets(h *Histogram, feedback *QueryFeedback) ([]bucket, []bool, int6 // UpdateHistogram updates the histogram according buckets. func UpdateHistogram(h *Histogram, feedback *QueryFeedback, statsVer int) *Histogram { - buckets, isNewBuckets, totalCount := splitBuckets(h, feedback) - ndvs := make([]int64, len(buckets)) - for i := range buckets { - ndvs[i] = buckets[i].Ndv + if statsVer < Version2 { + // If it's the stats we haven't maintain the bucket NDV yet. Reset the ndv. + for i := range feedback.Feedback { + feedback.Feedback[i].Ndv = 0 + } } - log.Warn("update hist", zap.Int64s("ndvs", ndvs)) + buckets, isNewBuckets, totalCount := splitBuckets(h, feedback) buckets = mergeBuckets(buckets, isNewBuckets, float64(totalCount)) hist := buildNewHistogram(h, buckets) // Update the NDV of primary key column. if feedback.Tp == PkType { hist.NDV = int64(hist.TotalRowCount()) - } else if feedback.Tp == IndexType { + // If we maintained the NDV of bucket. We can also update the total ndv. + } else if feedback.Tp == IndexType && statsVer == 2 { totNdv := int64(0) for _, bkt := range buckets { totNdv += bkt.Ndv diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go index 3fb665cc2b738..871c078383cd8 100644 --- a/statistics/handle/update_test.go +++ b/statistics/handle/update_test.go @@ -769,25 +769,25 @@ func (s *testStatsSuite) TestQueryFeedback(c *C) { // test primary key feedback sql: "select * from t where t.a <= 5 order by a desc", hist: "column:1 ndv:4 totColSize:0\n" + - "num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 1\n" + - "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 2\n" + - "num: 1 lower_bound: 4 upper_bound: 4 repeats: 1 ndv: 1", + "num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 0\n" + + "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n" + + "num: 1 lower_bound: 4 upper_bound: 4 repeats: 1 ndv: 0", idxCols: 0, }, { // test index feedback by double read sql: "select * from t use index(idx) where t.b <= 5", - hist: "index:1 ndv:3\n" + - "num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 2\n" + - "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1", + hist: "index:1 ndv:2\n" + + "num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 0\n" + + "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 0", idxCols: 1, }, { // test index feedback by single read sql: "select b from t use index(idx) where t.b <= 5", - hist: "index:1 ndv:3\n" + - "num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 2\n" + - "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1", + hist: "index:1 ndv:2\n" + + "num: 3 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 0\n" + + "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 0", idxCols: 1, }, } @@ -889,22 +889,22 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) { // test primary key feedback sql: "select * from t where t.a <= 5", hist: "column:1 ndv:2 totColSize:0\n" + - "num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 1\n" + - "num: 1 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 1", + "num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 0\n" + + "num: 1 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 0", idxCols: 0, }, { // test index feedback by double read sql: "select * from t use index(idx) where t.b <= 5", hist: "index:1 ndv:1\n" + - "num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 1", + "num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0", idxCols: 1, }, { // test index feedback by single read sql: "select b from t use index(idx) where t.b <= 5", hist: "index:1 ndv:1\n" + - "num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 1", + "num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0", idxCols: 1, }, } @@ -1025,8 +1025,8 @@ func (s *testStatsSuite) TestUpdateStatsByLocalFeedback(c *C) { h.UpdateStatsByLocalFeedback(s.do.InfoSchema()) tbl := h.GetTableStats(tblInfo) - c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:1 totColSize:0\n"+ - "num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 1\n"+ + c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+ + "num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n"+ "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+ "num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0") sc := &stmtctx.StatementContext{TimeZone: time.Local} @@ -1036,8 +1036,8 @@ func (s *testStatsSuite) TestUpdateStatsByLocalFeedback(c *C) { c.Assert(tbl.Indices[tblInfo.Indices[0].ID].CMSketch.QueryBytes(low), Equals, uint64(2)) c.Assert(tbl.Indices[tblInfo.Indices[0].ID].ToString(1), Equals, "index:1 ndv:2\n"+ - "num: 2 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 1\n"+ - "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 1") + "num: 2 lower_bound: -inf upper_bound: 5 repeats: 0 ndv: 0\n"+ + "num: 1 lower_bound: 5 upper_bound: 5 repeats: 1 ndv: 0") // Test that it won't cause panic after update. testKit.MustQuery("select * from t use index(idx) where b > 0") @@ -1081,8 +1081,8 @@ func (s *testStatsSuite) TestUpdatePartitionStatsByLocalFeedback(c *C) { pid := tblInfo.Partition.Definitions[0].ID tbl := h.GetPartitionStats(tblInfo, pid) - c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:1 totColSize:0\n"+ - "num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 1\n"+ + c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+ + "num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n"+ "num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+ "num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0") } @@ -1157,21 +1157,21 @@ func (s *testStatsSuite) TestLogDetailedInfo(c *C) { }{ { sql: "select * from t where t.a <= 15", - result: "[stats-feedback] test.t, column=a, rangeStr=range: [-inf,8), actual: 8, expected: 8, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 8, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}" + - "[stats-feedback] test.t, column=a, rangeStr=range: [8,15), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}", + result: "[stats-feedback] test.t, column=a, rangeStr=range: [-inf,8), actual: 8, expected: 8, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 0, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 0}" + + "[stats-feedback] test.t, column=a, rangeStr=range: [8,15), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 0}", }, { sql: "select * from t use index(idx) where t.b <= 15", - result: "[stats-feedback] test.t, index=idx, rangeStr=range: [-inf,8), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 8, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8}" + - "[stats-feedback] test.t, index=idx, rangeStr=range: [8,16), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 8, num: 4 lower_bound: 16 upper_bound: 19 repeats: 1 ndv: 4}", + result: "[stats-feedback] test.t, index=idx, rangeStr=range: [-inf,8), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 0, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 0}" + + "[stats-feedback] test.t, index=idx, rangeStr=range: [8,16), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1 ndv: 0, num: 4 lower_bound: 16 upper_bound: 19 repeats: 1 ndv: 0}", }, { sql: "select b from t use index(idx_ba) where b = 1 and a <= 5", - result: "[stats-feedback] test.t, index=idx_ba, rangeStr=range: [1 -inf,1 6), actual: 1, expected: 0, histogram: {num: 8 lower_bound: (0, 0) upper_bound: (7, 7) repeats: 1 ndv: 8}", + result: "[stats-feedback] test.t, index=idx_ba, actual=1, equality=1, expected equality=1, range=range: [-inf,6], actual: -1, expected: 6, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1 ndv: 0}", }, { sql: "select b from t use index(idx_bc) where b = 1 and c <= 5", - result: "[stats-feedback] test.t, index=idx_bc, rangeStr=range: [1 -inf,1 6), actual: 1, expected: 0, histogram: {num: 8 lower_bound: (0, 0) upper_bound: (7, 7) repeats: 1 ndv: 8}", + result: "[stats-feedback] test.t, index=idx_bc, actual=1, equality=1, expected equality=1, range=[-inf,6], pseudo count=7", }, { sql: "select b from t use index(idx_ba) where b = 1", @@ -1523,9 +1523,9 @@ func (s *testStatsSuite) TestAbnormalIndexFeedback(c *C) { // The real count of `a = 1` is 0. sql: "select * from t where a = 1 and b < 21", hist: "column:2 ndv:20 totColSize:20\n" + - "num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0\n" + - "num: 4 lower_bound: 7 upper_bound: 14 repeats: 0\n" + - "num: 4 lower_bound: 14 upper_bound: 21 repeats: 0", + "num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0 ndv: 0\n" + + "num: 4 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 0\n" + + "num: 4 lower_bound: 14 upper_bound: 21 repeats: 0 ndv: 0", rangeID: tblInfo.Columns[1].ID, idxID: tblInfo.Indices[0].ID, eqCount: 3, @@ -1534,9 +1534,9 @@ func (s *testStatsSuite) TestAbnormalIndexFeedback(c *C) { // The real count of `b > 10` is 0. sql: "select * from t where a = 2 and b > 10", hist: "column:2 ndv:20 totColSize:20\n" + - "num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0\n" + - "num: 4 lower_bound: 7 upper_bound: 14 repeats: 0\n" + - "num: 5 lower_bound: 14 upper_bound: 9223372036854775807 repeats: 0", + "num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0 ndv: 0\n" + + "num: 4 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 0\n" + + "num: 5 lower_bound: 14 upper_bound: 9223372036854775807 repeats: 0 ndv: 0", rangeID: tblInfo.Columns[1].ID, idxID: tblInfo.Indices[0].ID, eqCount: 3, @@ -1594,25 +1594,25 @@ func (s *testStatsSuite) TestFeedbackRanges(c *C) { { sql: "select * from t where a <= 50 or (a > 130 and a < 140)", hist: "column:1 ndv:30 totColSize:0\n" + - "num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 8\n" + - "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" + - "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14", + "num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 0\n" + + "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" + + "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 0", colID: 1, }, { sql: "select * from t where a >= 10", hist: "column:1 ndv:30 totColSize:0\n" + - "num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 8\n" + - "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" + - "num: 14 lower_bound: 16 upper_bound: 127 repeats: 0 ndv: 14", + "num: 8 lower_bound: -128 upper_bound: 8 repeats: 0 ndv: 0\n" + + "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" + + "num: 14 lower_bound: 16 upper_bound: 127 repeats: 0 ndv: 0", colID: 1, }, { sql: "select * from t use index(idx) where a = 1 and (b <= 50 or (b > 130 and b < 140))", hist: "column:2 ndv:20 totColSize:30\n" + - "num: 8 lower_bound: -128 upper_bound: 7 repeats: 0 ndv: 8\n" + - "num: 8 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 8\n" + - "num: 7 lower_bound: 14 upper_bound: 51 repeats: 0 ndv: 7", + "num: 8 lower_bound: -128 upper_bound: 7 repeats: 0 ndv: 0\n" + + "num: 8 lower_bound: 7 upper_bound: 14 repeats: 0 ndv: 0\n" + + "num: 7 lower_bound: 14 upper_bound: 51 repeats: 0 ndv: 0", colID: 2, }, } @@ -1674,9 +1674,9 @@ func (s *testStatsSuite) TestUnsignedFeedbackRanges(c *C) { { sql: "select * from t where a <= 50", hist: "column:1 ndv:30 totColSize:10\n" + - "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 8\n" + - "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" + - "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14", + "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 0\n" + + "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" + + "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 0", tblName: "t", }, { @@ -1690,9 +1690,9 @@ func (s *testStatsSuite) TestUnsignedFeedbackRanges(c *C) { { sql: "select * from t1 where a <= 50", hist: "column:1 ndv:30 totColSize:10\n" + - "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 8\n" + - "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 8\n" + - "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 14", + "num: 8 lower_bound: 0 upper_bound: 8 repeats: 0 ndv: 0\n" + + "num: 8 lower_bound: 8 upper_bound: 16 repeats: 0 ndv: 0\n" + + "num: 14 lower_bound: 16 upper_bound: 50 repeats: 0 ndv: 0", tblName: "t1", }, { diff --git a/statistics/histogram.go b/statistics/histogram.go index 8e4ba627cfb61..9b30432f0d857 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -1032,7 +1032,7 @@ func (idx *Index) equalRowCount(b []byte, modifyCount int64) float64 { if idx.NDV > 0 && idx.outOfRange(val) { return outOfRangeEQSelectivity(idx.NDV, modifyCount, int64(idx.TotalRowCount())) * idx.TotalRowCount() } - if idx.CMSketch != nil && idx.StatsVer == Version1 { + if idx.CMSketch != nil && idx.StatsVer < Version2 { return float64(idx.QueryBytes(b)) } // If it's version2, query the top-n first. diff --git a/statistics/histogram_test.go b/statistics/histogram_test.go index cd0196501a1d4..b017fe1bcf0f8 100644 --- a/statistics/histogram_test.go +++ b/statistics/histogram_test.go @@ -49,11 +49,11 @@ func (s *testStatisticsSuite) TestNewHistogramBySelectivity(c *C) { node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(13), HighVal: types.MakeDatums(13)}) node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(25), HighVal: []types.Datum{types.MaxValueDatum()}}) intColResult := `column:1 ndv:16 totColSize:0 -num: 30 lower_bound: 0 upper_bound: 2 repeats: 10 -num: 11 lower_bound: 6 upper_bound: 8 repeats: 0 -num: 30 lower_bound: 9 upper_bound: 11 repeats: 0 -num: 1 lower_bound: 12 upper_bound: 14 repeats: 0 -num: 30 lower_bound: 27 upper_bound: 29 repeats: 0` +num: 30 lower_bound: 0 upper_bound: 2 repeats: 10 ndv: 0 +num: 11 lower_bound: 6 upper_bound: 8 repeats: 0 ndv: 0 +num: 30 lower_bound: 9 upper_bound: 11 repeats: 0 ndv: 0 +num: 1 lower_bound: 12 upper_bound: 14 repeats: 0 ndv: 0 +num: 30 lower_bound: 27 upper_bound: 29 repeats: 0 ndv: 0` stringCol := &Column{} stringCol.Histogram = *NewHistogram(2, 15, 30, 0, types.NewFieldType(mysql.TypeString), chunk.InitialCapacity, 0) @@ -82,11 +82,11 @@ num: 30 lower_bound: 27 upper_bound: 29 repeats: 0` node2.Ranges = append(node2.Ranges, &ranger.Range{LowVal: types.MakeDatums("ddd"), HighVal: types.MakeDatums("fff")}) node2.Ranges = append(node2.Ranges, &ranger.Range{LowVal: types.MakeDatums("ggg"), HighVal: []types.Datum{types.MaxValueDatum()}}) stringColResult := `column:2 ndv:9 totColSize:0 -num: 60 lower_bound: a upper_bound: aaaabbbb repeats: 0 -num: 52 lower_bound: bbbb upper_bound: fdsfdsfds repeats: 0 -num: 54 lower_bound: kkkkk upper_bound: ooooo repeats: 0 -num: 60 lower_bound: oooooo upper_bound: sssss repeats: 0 -num: 60 lower_bound: ssssssu upper_bound: yyyyy repeats: 0` +num: 60 lower_bound: a upper_bound: aaaabbbb repeats: 0 ndv: 0 +num: 52 lower_bound: bbbb upper_bound: fdsfdsfds repeats: 0 ndv: 0 +num: 54 lower_bound: kkkkk upper_bound: ooooo repeats: 0 ndv: 0 +num: 60 lower_bound: oooooo upper_bound: sssss repeats: 0 ndv: 0 +num: 60 lower_bound: ssssssu upper_bound: yyyyy repeats: 0 ndv: 0` newColl := coll.NewHistCollBySelectivity(sc, []*StatsNode{node, node2}) c.Assert(newColl.Columns[1].String(), Equals, intColResult) @@ -110,10 +110,10 @@ num: 60 lower_bound: ssssssu upper_bound: yyyyy repeats: 0` node3.Ranges = append(node3.Ranges, &ranger.Range{LowVal: types.MakeDatums(10), HighVal: types.MakeDatums(13)}) idxResult := `index:0 ndv:7 -num: 30 lower_bound: 0 upper_bound: 2 repeats: 10 -num: 30 lower_bound: 3 upper_bound: 5 repeats: 10 -num: 30 lower_bound: 9 upper_bound: 11 repeats: 10 -num: 30 lower_bound: 12 upper_bound: 14 repeats: 10` +num: 30 lower_bound: 0 upper_bound: 2 repeats: 10 ndv: 0 +num: 30 lower_bound: 3 upper_bound: 5 repeats: 10 ndv: 0 +num: 30 lower_bound: 9 upper_bound: 11 repeats: 10 ndv: 0 +num: 30 lower_bound: 12 upper_bound: 14 repeats: 10 ndv: 0` newColl = coll.NewHistCollBySelectivity(sc, []*StatsNode{node3}) c.Assert(newColl.Indices[0].String(), Equals, idxResult) diff --git a/statistics/statistics_test.go b/statistics/statistics_test.go index 837cdae704a26..b755d58699581 100644 --- a/statistics/statistics_test.go +++ b/statistics/statistics_test.go @@ -680,5 +680,5 @@ func (s *testStatisticsSuite) TestIndexRanges(c *C) { ran[0].HighVal[0] = types.NewIntDatum(1000) count, err = tbl.GetRowCountByIndexRanges(sc, 0, ran) c.Assert(err, IsNil) - c.Assert(int(count), Equals, 3) + c.Assert(int(count), Equals, 0) } diff --git a/statistics/testdata/stats_suite_out.json b/statistics/testdata/stats_suite_out.json index 59ec718e6f8f2..2a9895c8cc238 100644 --- a/statistics/testdata/stats_suite_out.json +++ b/statistics/testdata/stats_suite_out.json @@ -60,8 +60,8 @@ "Name": "TestDiscreteDistribution", "Cases": [ [ - "IndexReader_6 1.02 root index:IndexRangeScan_5", - "└─IndexRangeScan_5 1.02 cop[tikv] table:t, index:idx(a, b) range:[\"tw\" -inf,\"tw\" 0), keep order:false" + "IndexReader_6 0.00 root index:IndexRangeScan_5", + "└─IndexRangeScan_5 0.00 cop[tikv] table:t, index:idx(a, b) range:[\"tw\" -inf,\"tw\" 0), keep order:false" ] ] }, @@ -92,8 +92,8 @@ "Name": "TestCollationColumnEstimate", "Cases": [ [ - "test t a 0 0 2 2 \u0000A\u0000A\u0000A \u0000A\u0000A\u0000A 1", - "test t a 0 1 4 2 \u0000B\u0000B\u0000B \u0000B\u0000B\u0000B 1" + "test t a 0 0 2 2 \u0000A\u0000A\u0000A \u0000A\u0000A\u0000A 0", + "test t a 0 1 4 2 \u0000B\u0000B\u0000B \u0000B\u0000B\u0000B 0" ], [ "TableReader_7 2.00 root data:Selection_6", diff --git a/store/mockstore/unistore/cophandler/cop_handler.go b/store/mockstore/unistore/cophandler/cop_handler.go index 40cb1a8c7203a..ba81ee4b63c4c 100644 --- a/store/mockstore/unistore/cophandler/cop_handler.go +++ b/store/mockstore/unistore/cophandler/cop_handler.go @@ -38,8 +38,10 @@ import ( "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/collate" + "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/rowcodec" "github.com/pingcap/tipb/go-tipb" + "go.uber.org/zap" ) // MPPCtx is the mpp execution context @@ -147,6 +149,9 @@ func handleCopDAGRequest(dbReader *dbreader.DBReader, lockStore *lockstore.MemSt } return nil } + if dagReq.CollectRangeCounts != nil && *dagReq.CollectRangeCounts == true { + logutil.BgLogger().Warn("unistore exec", zap.Int("ndv len", len(closureExec.ndvs))) + } return buildResp(chunks, closureExec, closureExec.ndvs, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime)) } diff --git a/util/testkit/testkit.go b/util/testkit/testkit.go index a3437f3a27a97..1b52f78549678 100644 --- a/util/testkit/testkit.go +++ b/util/testkit/testkit.go @@ -319,7 +319,7 @@ func (tk *TestKit) ResultSetToResult(rs sqlexec.RecordSet, comment check.Comment // ResultSetToResultWithCtx converts sqlexec.RecordSet to testkit.Result. func (tk *TestKit) ResultSetToResultWithCtx(ctx context.Context, rs sqlexec.RecordSet, comment check.CommentInterface) *Result { sRows, err := session.ResultSetToStringSlice(ctx, tk.Se, rs) - tk.c.Check(err, check.IsNil, comment) + tk.c.Check(errors.ErrorStack(err), check.Equals, "", comment) return &Result{rows: sRows, c: tk.c, comment: comment} } From 878b5cac44174a74f47ea2c1005124c3f30f851e Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Thu, 24 Dec 2020 00:15:09 +0800 Subject: [PATCH 8/9] fix go mod tidy --- go.sum | 10 ++++++---- store/mockstore/unistore/cophandler/closure_exec.go | 2 +- store/mockstore/unistore/cophandler/cop_handler.go | 5 ----- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/go.sum b/go.sum index 7fd465e28e730..d1835d3b7fea9 100644 --- a/go.sum +++ b/go.sum @@ -711,10 +711,6 @@ github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible/go.mod h1:XGdcy9+yqlDSEMTpOXnwf3hiTeqrV6MN/u1se9N8yIM= github.com/pingcap/tipb v0.0.0-20190428032612-535e1abaa330/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= github.com/pingcap/tipb v0.0.0-20200417094153-7316d94df1ee/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= -github.com/pingcap/tipb v0.0.0-20201209065231-aa39b1b86217 h1:Ophn4Ud/QHp1BH0FJOzbAVBW9Mw8BlX0gtWkK7ubDy0= -github.com/pingcap/tipb v0.0.0-20201209065231-aa39b1b86217/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= -github.com/pingcap/tipb v0.0.0-20201210091214-70edbc366d92 h1:+EomCEPnE5MI0HD10wyoiYj1At57midQ4TagtvV9bmY= -github.com/pingcap/tipb v0.0.0-20201210091214-70edbc366d92/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= github.com/pingcap/tipb v0.0.0-20201215091753-bd0cb2b314a4 h1:x64INZ8imEXO3MFcWD99lYlp52V9ZdYrxj74ynfyg3c= github.com/pingcap/tipb v0.0.0-20201215091753-bd0cb2b314a4/go.mod h1:RtkHW8WbcNxj8lsbzjaILci01CtYnYbIkQhjyZWrWVI= github.com/pingcap/tiup v1.2.3 h1:8OCQF7sHhT6VqE8pZU1JTSogPA90OFuWWM/B746x0YY= @@ -819,6 +815,7 @@ github.com/snowflakedb/gosnowflake v1.3.4/go.mod h1:NsRq2QeiMUuoNUJhp5Q6xGC4uBrs github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= @@ -971,6 +968,7 @@ go.uber.org/automaxprocs v1.2.0 h1:+RUihKM+nmYUoB9w0D0Ov5TJ2PpFO2FgenTxMJiZBZA= go.uber.org/automaxprocs v1.2.0/go.mod h1:YfO3fm683kQpzETxlTGZhGIVmXAhaw3gxeBADbpZtnU= go.uber.org/dig v1.8.0/go.mod h1:X34SnWGr8Fyla9zQNO2GSO2D+TIuqB14OS8JhYocIyw= go.uber.org/fx v1.10.0/go.mod h1:vLRicqpG/qQEzno4SYU86iCwfT95EZza+Eba0ItuxqY= +go.uber.org/goleak v0.10.0 h1:G3eWbSNIskeRqtsN/1uI5B+eP73y3JUuBsv9AZjehb4= go.uber.org/goleak v0.10.0/go.mod h1:VCZuO8V8mFPlL0F5J5GK1rtHV3DrFcQ1R8ryq7FK0aI= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= @@ -1046,6 +1044,7 @@ golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1212,6 +1211,7 @@ golang.org/x/tools v0.0.0-20200820010801-b793a1359eac h1:DugppSxw0LSF8lcjaODPJZo golang.org/x/tools v0.0.0-20200820010801-b793a1359eac/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= @@ -1281,6 +1281,7 @@ gopkg.in/alecthomas/kingpin.v3-unstable v3.0.0-20180810215634-df19058c872c/go.mo gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b h1:QRR6H1YWRnHb4Y/HeNFCTJLFVxaq6wH4YuVdsUOr75U= gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/cheggaaa/pb.v2 v2.0.7/go.mod h1:0CiZ1p8pvtxBlQpLXkHuUTpdJ1shm3OqCF1QugkjHL4= @@ -1309,6 +1310,7 @@ gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3M gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/tomb.v2 v2.0.0-20161208151619-d5d1b5820637/go.mod h1:BHsqpu/nsuzkT5BpiH1EMZPLyqSMM8JbIavyFACoFNk= gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= diff --git a/store/mockstore/unistore/cophandler/closure_exec.go b/store/mockstore/unistore/cophandler/closure_exec.go index 5b7bcc338c0ef..58e20d393a63c 100644 --- a/store/mockstore/unistore/cophandler/closure_exec.go +++ b/store/mockstore/unistore/cophandler/closure_exec.go @@ -1267,7 +1267,7 @@ func (e *indexScanProcessor) Finish() error { func (isc *idxScanCtx) checkVal(curVals [][]byte) bool { for i := 0; i < isc.columnLen; i++ { - if bytes.Compare(isc.prevVals[i], curVals[i]) != 0 { + if !bytes.Equal(isc.prevVals[i], curVals[i]) { return false } } diff --git a/store/mockstore/unistore/cophandler/cop_handler.go b/store/mockstore/unistore/cophandler/cop_handler.go index ba81ee4b63c4c..40cb1a8c7203a 100644 --- a/store/mockstore/unistore/cophandler/cop_handler.go +++ b/store/mockstore/unistore/cophandler/cop_handler.go @@ -38,10 +38,8 @@ import ( "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/collate" - "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/rowcodec" "github.com/pingcap/tipb/go-tipb" - "go.uber.org/zap" ) // MPPCtx is the mpp execution context @@ -149,9 +147,6 @@ func handleCopDAGRequest(dbReader *dbreader.DBReader, lockStore *lockstore.MemSt } return nil } - if dagReq.CollectRangeCounts != nil && *dagReq.CollectRangeCounts == true { - logutil.BgLogger().Warn("unistore exec", zap.Int("ndv len", len(closureExec.ndvs))) - } return buildResp(chunks, closureExec, closureExec.ndvs, dagReq, err, dagCtx.sc.GetWarnings(), time.Since(startTime)) } From 48cc1bdf6035a8a35131c403dd5fdcd9ac6ab79c Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Wed, 30 Dec 2020 01:45:47 +0800 Subject: [PATCH 9/9] address comments --- statistics/histogram.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/statistics/histogram.go b/statistics/histogram.go index 519d2096514db..7b71d0116463d 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -211,9 +211,7 @@ func (c *Column) AvgColSizeListInDisk(count int64) float64 { // AppendBucket appends a bucket into `hg`. func (hg *Histogram) AppendBucket(lower *types.Datum, upper *types.Datum, count, repeat int64) { - hg.Buckets = append(hg.Buckets, Bucket{Count: count, Repeat: repeat, NDV: 0}) - hg.Bounds.AppendDatum(0, lower) - hg.Bounds.AppendDatum(0, upper) + hg.AppendBucketWithNDV(lower, upper, count, repeat, 0) } // AppendBucketWithNDV appends a bucket into `hg` and set value for field `NDV`.