From 66304c1930ce5232740a307d0281327b43193595 Mon Sep 17 00:00:00 2001 From: Kenan Yao Date: Wed, 12 Jun 2019 12:50:14 +0800 Subject: [PATCH] planner: fix wrong selectivity for inner selection in index join (#10633) --- cmd/explaintest/r/explain_complex.result | 4 +-- .../r/explain_complex_stats.result | 12 ++++---- cmd/explaintest/r/explain_easy.result | 6 ++-- cmd/explaintest/r/generated_columns.result | 12 ++++---- cmd/explaintest/r/index_join.result | 12 ++++---- cmd/explaintest/r/topn_push_down.result | 8 +++--- cmd/explaintest/r/tpch.result | 16 +++++------ executor/index_lookup_join_test.go | 20 ++++++------- planner/core/cbo_test.go | 4 +-- planner/core/exhaust_physical_plans.go | 19 +++++++++++-- planner/core/find_best_task.go | 15 +++++----- planner/core/logical_plans.go | 7 +++-- planner/core/stats.go | 28 ++++++++----------- 13 files changed, 87 insertions(+), 76 deletions(-) diff --git a/cmd/explaintest/r/explain_complex.result b/cmd/explaintest/r/explain_complex.result index 64da664d9adb6..e659323792973 100644 --- a/cmd/explaintest/r/explain_complex.result +++ b/cmd/explaintest/r/explain_complex.result @@ -156,9 +156,9 @@ Projection_10 0.00 root test.dt.id, test.dt.aid, test.dt.pt, test.dt.dic, test.d ├─TableReader_41 0.00 root data:Selection_40 │ └─Selection_40 0.00 cop eq(test.dt.bm, 0), eq(test.dt.pt, "ios"), gt(test.dt.t, 1478185592), not(isnull(test.dt.dic)) │ └─TableScan_39 10000.00 cop table:dt, range:[0,+inf], keep order:false, stats:pseudo - └─IndexLookUp_18 3.33 root + └─IndexLookUp_18 0.00 root ├─IndexScan_15 10.00 cop table:rr, index:aid, dic, range: decided by [eq(test.rr.aid, test.dt.aid) eq(test.rr.dic, test.dt.dic)], keep order:false, stats:pseudo - └─Selection_17 3.33 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592) + └─Selection_17 0.00 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592) └─TableScan_16 10.00 cop table:rr, keep order:false, stats:pseudo explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,sum(am) as am from pp where ps=2 and ppt>=1478188800 and ppt<1478275200 and pi in ('510017','520017') and uid in ('18089709','18090780') group by pc,cr; id count task operator info diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result index 32e2eb653c670..4e08eb2d4eddf 100644 --- a/cmd/explaintest/r/explain_complex_stats.result +++ b/cmd/explaintest/r/explain_complex_stats.result @@ -132,9 +132,9 @@ Projection_13 424.00 root test.gad.id, test.dd.id, test.gad.aid, test.gad.cm, te ├─TableReader_29 424.00 root data:Selection_28 │ └─Selection_28 424.00 cop eq(test.gad.bm, 0), eq(test.gad.pt, "android"), gt(test.gad.t, 1478143908), not(isnull(test.gad.ip)) │ └─TableScan_27 1999.00 cop table:gad, range:[0,+inf], keep order:false - └─IndexLookUp_23 455.80 root + └─IndexLookUp_23 0.23 root ├─IndexScan_20 1.00 cop table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, test.gad.aid)], keep order:false - └─Selection_22 455.80 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t)) + └─Selection_22 0.23 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t)) └─TableScan_21 1.00 cop table:dd, keep order:false explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000; id count task operator info @@ -144,9 +144,9 @@ Projection_10 170.34 root test.gad.id, test.sdk.id, test.gad.aid, test.gad.cm, t ├─TableReader_23 170.34 root data:Selection_22 │ └─Selection_22 170.34 cop eq(test.gad.bm, 0), eq(test.gad.dit, "mac"), eq(test.gad.pt, "ios"), gt(test.gad.t, 1477971479), not(isnull(test.gad.dic)) │ └─TableScan_21 1999.00 cop table:gad, range:[0,+inf], keep order:false - └─IndexLookUp_17 509.04 root + └─IndexLookUp_17 0.25 root ├─IndexScan_14 1.00 cop table:sdk, index:aid, dic, range: decided by [eq(test.sdk.aid, test.gad.aid)], keep order:false - └─Selection_16 509.04 cop eq(test.sdk.bm, 0), eq(test.sdk.pt, "ios"), gt(test.sdk.t, 1477971479), not(isnull(test.sdk.mac)), not(isnull(test.sdk.t)) + └─Selection_16 0.25 cop eq(test.sdk.bm, 0), eq(test.sdk.pt, "ios"), gt(test.sdk.t, 1477971479), not(isnull(test.sdk.mac)), not(isnull(test.sdk.t)) └─TableScan_15 1.00 cop table:dd, keep order:false explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5; id count task operator info @@ -164,9 +164,9 @@ Projection_10 428.32 root test.dt.id, test.dt.aid, test.dt.pt, test.dt.dic, test ├─TableReader_41 428.32 root data:Selection_40 │ └─Selection_40 428.32 cop eq(test.dt.bm, 0), eq(test.dt.pt, "ios"), gt(test.dt.t, 1478185592), not(isnull(test.dt.dic)) │ └─TableScan_39 2000.00 cop table:dt, range:[0,+inf], keep order:false - └─IndexLookUp_18 970.00 root + └─IndexLookUp_18 0.48 root ├─IndexScan_15 1.00 cop table:rr, index:aid, dic, range: decided by [eq(test.rr.aid, test.dt.aid) eq(test.rr.dic, test.dt.dic)], keep order:false - └─Selection_17 970.00 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592) + └─Selection_17 0.48 cop eq(test.rr.pt, "ios"), gt(test.rr.t, 1478185592) └─TableScan_16 1.00 cop table:rr, keep order:false explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,sum(am) as am from pp where ps=2 and ppt>=1478188800 and ppt<1478275200 and pi in ('510017','520017') and uid in ('18089709','18090780') group by pc,cr; id count task operator info diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index 4806f6be3d3b7..6583b65c47156 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -45,10 +45,10 @@ id count task operator info IndexJoin_12 4166.67 root left outer join, inner:IndexLookUp_11, outer key:test.t1.c2, inner key:test.t2.c1 ├─TableReader_24 3333.33 root data:TableScan_23 │ └─TableScan_23 3333.33 cop table:t1, range:(1,+inf], keep order:false, stats:pseudo -└─IndexLookUp_11 0.00 root - ├─Selection_10 0.00 cop not(isnull(test.t2.c1)) +└─IndexLookUp_11 9.99 root + ├─Selection_10 9.99 cop not(isnull(test.t2.c1)) │ └─IndexScan_8 10.00 cop table:t2, index:c1, range: decided by [eq(test.t2.c1, test.t1.c2)], keep order:false, stats:pseudo - └─TableScan_9 0.00 cop table:t2, keep order:false, stats:pseudo + └─TableScan_9 9.99 cop table:t2, keep order:false, stats:pseudo explain update t1 set t1.c2 = 2 where t1.c1 = 1; id count task operator info Point_Get_1 1.00 root table:t1, handle:1 diff --git a/cmd/explaintest/r/generated_columns.result b/cmd/explaintest/r/generated_columns.result index 5c10b13bbf593..0add5d3921876 100644 --- a/cmd/explaintest/r/generated_columns.result +++ b/cmd/explaintest/r/generated_columns.result @@ -72,10 +72,10 @@ ANALYZE TABLE sgc1, sgc2; EXPLAIN SELECT /*+ TIDB_INLJ(sgc1, sgc2) */ * from sgc1 join sgc2 on sgc1.a=sgc2.a; id count task operator info IndexJoin_17 5.00 root inner join, inner:IndexLookUp_16, outer key:test.sgc2.a, inner key:test.sgc1.a -├─IndexLookUp_16 0.00 root -│ ├─Selection_15 0.00 cop not(isnull(test.sgc1.a)) +├─IndexLookUp_16 5.00 root +│ ├─Selection_15 5.00 cop not(isnull(test.sgc1.a)) │ │ └─IndexScan_13 5.00 cop table:sgc1, index:a, range: decided by [eq(test.sgc1.a, test.sgc2.a)], keep order:false -│ └─TableScan_14 0.00 cop table:sgc1, keep order:false, stats:pseudo +│ └─TableScan_14 5.00 cop table:sgc1, keep order:false, stats:pseudo └─TableReader_20 1.00 root data:Selection_19 └─Selection_19 1.00 cop not(isnull(test.sgc2.a)) └─TableScan_18 1.00 cop table:sgc2, range:[-inf,+inf], keep order:false @@ -86,10 +86,10 @@ Projection_6 5.00 root test.sgc1.j1, test.sgc1.j2, test.sgc1.a, test.sgc1.b, tes ├─TableReader_39 1.00 root data:Selection_38 │ └─Selection_38 1.00 cop not(isnull(test.sgc2.a)) │ └─TableScan_37 1.00 cop table:sgc2, range:[-inf,+inf], keep order:false - └─IndexLookUp_12 0.00 root - ├─Selection_11 0.00 cop not(isnull(test.sgc1.a)) + └─IndexLookUp_12 5.00 root + ├─Selection_11 5.00 cop not(isnull(test.sgc1.a)) │ └─IndexScan_9 5.00 cop table:sgc1, index:a, range: decided by [eq(test.sgc1.a, test.sgc2.a)], keep order:false - └─TableScan_10 0.00 cop table:sgc1, keep order:false, stats:pseudo + └─TableScan_10 5.00 cop table:sgc1, keep order:false, stats:pseudo DROP TABLE IF EXISTS sgc3; CREATE TABLE sgc3 ( j JSON, diff --git a/cmd/explaintest/r/index_join.result b/cmd/explaintest/r/index_join.result index 6d5555bc8993e..b8cac2cbdafba 100644 --- a/cmd/explaintest/r/index_join.result +++ b/cmd/explaintest/r/index_join.result @@ -7,10 +7,10 @@ analyze table t1, t2; explain select /*+ TIDB_INLJ(t1, t2) */ * from t1 join t2 on t1.a=t2.a; id count task operator info IndexJoin_16 5.00 root inner join, inner:IndexLookUp_15, outer key:test.t2.a, inner key:test.t1.a -├─IndexLookUp_15 0.00 root -│ ├─Selection_14 0.00 cop not(isnull(test.t1.a)) +├─IndexLookUp_15 5.00 root +│ ├─Selection_14 5.00 cop not(isnull(test.t1.a)) │ │ └─IndexScan_12 5.00 cop table:t1, index:a, range: decided by [eq(test.t1.a, test.t2.a)], keep order:false -│ └─TableScan_13 0.00 cop table:t1, keep order:false, stats:pseudo +│ └─TableScan_13 5.00 cop table:t1, keep order:false, stats:pseudo └─TableReader_19 1.00 root data:Selection_18 └─Selection_18 1.00 cop not(isnull(test.t2.a)) └─TableScan_17 1.00 cop table:t2, range:[-inf,+inf], keep order:false @@ -21,7 +21,7 @@ Projection_6 5.00 root test.t1.a, test.t1.b, test.t2.a, test.t2.b ├─TableReader_30 1.00 root data:Selection_29 │ └─Selection_29 1.00 cop not(isnull(test.t2.a)) │ └─TableScan_28 1.00 cop table:t2, range:[-inf,+inf], keep order:false - └─IndexLookUp_11 0.00 root - ├─Selection_10 0.00 cop not(isnull(test.t1.a)) + └─IndexLookUp_11 5.00 root + ├─Selection_10 5.00 cop not(isnull(test.t1.a)) │ └─IndexScan_8 5.00 cop table:t1, index:a, range: decided by [eq(test.t1.a, test.t2.a)], keep order:false - └─TableScan_9 0.00 cop table:t1, keep order:false, stats:pseudo + └─TableScan_9 5.00 cop table:t1, keep order:false, stats:pseudo diff --git a/cmd/explaintest/r/topn_push_down.result b/cmd/explaintest/r/topn_push_down.result index b6080a8720f82..4d40a3b3b8caf 100644 --- a/cmd/explaintest/r/topn_push_down.result +++ b/cmd/explaintest/r/topn_push_down.result @@ -177,12 +177,12 @@ Projection_13 0.00 root test.te.expect_time │ │ │ └─IndexScan_70 10.00 cop table:tr, index:shop_identy, trade_status, business_type, trade_pay_status, trade_type, delivery_type, source, biz_date, range:[810094178,810094178], keep order:false, stats:pseudo │ │ └─Selection_73 0.00 cop eq(test.tr.brand_identy, 32314), eq(test.tr.domain_type, 2) │ │ └─TableScan_71 0.00 cop table:tr, keep order:false, stats:pseudo - │ └─IndexLookUp_35 250.00 root + │ └─IndexLookUp_35 0.25 root │ ├─IndexScan_32 10.00 cop table:te, index:trade_id, range: decided by [eq(test.te.trade_id, test.tr.id)], keep order:false, stats:pseudo - │ └─Selection_34 250.00 cop ge(test.te.expect_time, 2018-04-23 00:00:00.000000), le(test.te.expect_time, 2018-04-23 23:59:59.000000) + │ └─Selection_34 0.25 cop ge(test.te.expect_time, 2018-04-23 00:00:00.000000), le(test.te.expect_time, 2018-04-23 23:59:59.000000) │ └─TableScan_33 10.00 cop table:te, keep order:false, stats:pseudo - └─IndexReader_91 0.00 root index:Selection_90 - └─Selection_90 0.00 cop not(isnull(test.p.relate_id)) + └─IndexReader_91 9.99 root index:Selection_90 + └─Selection_90 9.99 cop not(isnull(test.p.relate_id)) └─IndexScan_89 10.00 cop table:p, index:relate_id, range: decided by [eq(test.p.relate_id, test.tr.id)], keep order:false, stats:pseudo desc select 1 as a from dual order by a limit 1; id count task operator info diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index 21ce363ba67ac..48daaa9b7d8b9 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -260,9 +260,9 @@ Projection_14 10.00 root tpch.lineitem.l_orderkey, 7_col_0, tpch.orders.o_orderd │ └─TableReader_52 36870000.00 root data:Selection_51 │ └─Selection_51 36870000.00 cop lt(tpch.orders.o_orderdate, 1995-03-13 00:00:00.000000) │ └─TableScan_50 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false - └─IndexLookUp_28 162945114.27 root + └─IndexLookUp_28 0.54 root ├─IndexScan_25 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false - └─Selection_27 162945114.27 cop gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000) + └─Selection_27 0.54 cop gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000) └─TableScan_26 1.00 cop table:lineitem, keep order:false /* Q4 Order Priority Checking Query @@ -301,9 +301,9 @@ Sort_10 1.00 root tpch.orders.o_orderpriority:asc ├─TableReader_33 2925937.50 root data:Selection_32 │ └─Selection_32 2925937.50 cop ge(tpch.orders.o_orderdate, 1995-01-01 00:00:00.000000), lt(tpch.orders.o_orderdate, 1995-04-01) │ └─TableScan_31 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false - └─IndexLookUp_20 240004648.80 root + └─IndexLookUp_20 0.80 root ├─IndexScan_17 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false - └─Selection_19 240004648.80 cop lt(tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate) + └─Selection_19 0.80 cop lt(tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate) └─TableScan_18 1.00 cop table:lineitem, keep order:false /* Q5 Local Supplier Volume Query @@ -672,9 +672,9 @@ Projection_17 20.00 root tpch.customer.c_custkey, tpch.customer.c_name, 9_col_0, │ └─TableReader_48 3017307.69 root data:Selection_47 │ └─Selection_47 3017307.69 cop ge(tpch.orders.o_orderdate, 1993-08-01 00:00:00.000000), lt(tpch.orders.o_orderdate, 1993-11-01) │ └─TableScan_46 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false - └─IndexLookUp_31 73916005.00 root + └─IndexLookUp_31 0.25 root ├─IndexScan_28 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false - └─Selection_30 73916005.00 cop eq(tpch.lineitem.l_returnflag, "R") + └─Selection_30 0.25 cop eq(tpch.lineitem.l_returnflag, "R") └─TableScan_29 1.00 cop table:lineitem, keep order:false /* Q11 Important Stock Identification Query @@ -1241,9 +1241,9 @@ Projection_25 1.00 root tpch.supplier.s_name, 17_col_0 │ └─IndexLookUp_55 1.00 root │ ├─IndexScan_53 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.l2.l_orderkey, tpch.l1.l_orderkey)], keep order:false │ └─TableScan_54 1.00 cop table:lineitem, keep order:false - └─IndexLookUp_39 240004648.80 root + └─IndexLookUp_39 0.80 root ├─IndexScan_36 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.l3.l_orderkey, tpch.l1.l_orderkey)], keep order:false - └─Selection_38 240004648.80 cop gt(tpch.l3.l_receiptdate, tpch.l3.l_commitdate) + └─Selection_38 0.80 cop gt(tpch.l3.l_receiptdate, tpch.l3.l_commitdate) └─TableScan_37 1.00 cop table:lineitem, keep order:false /* Q22 Global Sales Opportunity Query diff --git a/executor/index_lookup_join_test.go b/executor/index_lookup_join_test.go index 60ebb5e1e811b..9e0863bab46c6 100644 --- a/executor/index_lookup_join_test.go +++ b/executor/index_lookup_join_test.go @@ -67,11 +67,11 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) { "│ └─TableReader_17 9990.00 root data:Selection_16", "│ └─Selection_16 9990.00 cop not(isnull(test.t1.a))", "│ └─TableScan_15 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", - "└─UnionScan_12 0.00 root not(isnull(test.t2.a))", - " └─IndexLookUp_11 0.00 root ", - " ├─Selection_10 0.00 cop not(isnull(test.t2.a))", + "└─UnionScan_12 9.99 root not(isnull(test.t2.a))", + " └─IndexLookUp_11 9.99 root ", + " ├─Selection_10 9.99 cop not(isnull(test.t2.a))", " │ └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", - " └─TableScan_9 0.00 cop table:t2, keep order:false, stats:pseudo", + " └─TableScan_9 9.99 cop table:t2, keep order:false, stats:pseudo", )) tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( "2 2 2 2 2", @@ -85,9 +85,9 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) { " │ └─TableReader_16 9990.00 root data:Selection_15", " │ └─Selection_15 9990.00 cop not(isnull(test.t1.a))", " │ └─TableScan_14 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", - " └─UnionScan_11 0.00 root not(isnull(test.t2.a))", - " └─IndexReader_10 0.00 root index:Selection_9", - " └─Selection_9 0.00 cop not(isnull(test.t2.a))", + " └─UnionScan_11 9.99 root not(isnull(test.t2.a))", + " └─IndexReader_10 9.99 root index:Selection_9", + " └─Selection_9 9.99 cop not(isnull(test.t2.a))", " └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", )) tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ t1.a, t2.a from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( @@ -114,9 +114,9 @@ func (s *testSuite1) TestBatchIndexJoinUnionScan(c *C) { " │ └─TableReader_22 9990.00 root data:Selection_21", " │ └─Selection_21 9990.00 cop not(isnull(test.t1.a))", " │ └─TableScan_20 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", - " └─UnionScan_26 0.00 root not(isnull(test.t2.a))", - " └─IndexReader_25 0.00 root index:Selection_24", - " └─Selection_24 0.00 cop not(isnull(test.t2.a))", + " └─UnionScan_26 9.99 root not(isnull(test.t2.a))", + " └─IndexReader_25 9.99 root index:Selection_24", + " └─Selection_24 9.99 cop not(isnull(test.t2.a))", " └─IndexScan_23 10.00 cop table:t2, index:a, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", )) tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ count(*) from t1 join t2 on t1.a = t2.id").Check(testkit.Rows( diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go index f3034acf1136f..4c323c39c785d 100644 --- a/planner/core/cbo_test.go +++ b/planner/core/cbo_test.go @@ -906,8 +906,8 @@ func (s *testAnalyzeSuite) TestIssue9562(c *C) { "├─TableReader_12 9980.01 root data:Selection_11", "│ └─Selection_11 9980.01 cop not(isnull(test.t1.a)), not(isnull(test.t1.c))", "│ └─TableScan_10 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", - "└─IndexReader_8 0.00 root index:Selection_7", - " └─Selection_7 0.00 cop not(isnull(test.t2.a)), not(isnull(test.t2.c))", + "└─IndexReader_8 9.98 root index:Selection_7", + " └─Selection_7 9.98 cop not(isnull(test.t2.a)), not(isnull(test.t2.c))", " └─IndexScan_6 10.00 cop table:t2, index:a, b, c, range: decided by [eq(test.t2.a, test.t1.a) gt(test.t2.b, minus(test.t1.b, 1)) lt(test.t2.b, plus(test.t1.b, 1))], keep order:false, stats:pseudo", )) diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index e1eecf0766540..cb80204df7b4a 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -588,8 +588,23 @@ func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexIn is.initSchema(ds.id, idx, cop.tablePlan != nil) indexConds, tblConds := splitIndexFilterConditions(filterConds, idx.Columns, ds.tableInfo) - path := &accessPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64} - is.addPushedDownSelection(cop, ds, math.MaxFloat64, path) + path := &accessPath{ + indexFilters: indexConds, + tableFilters: tblConds, + countAfterAccess: rowCount, + } + // Assume equal conditions used by index join and other conditions are independent. + if len(indexConds) > 0 { + selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, indexConds) + if err != nil { + logutil.Logger(context.Background()).Debug("calculate selectivity failed, use selection factor", zap.Error(err)) + selectivity = selectionFactor + } + path.countAfterIndex = rowCount * selectivity + } + selectivity := ds.stats.RowCount / ds.tableStats.RowCount + finalStats := ds.stats.ScaleByExpectCnt(selectivity * rowCount) + is.addPushedDownSelection(cop, ds, path, finalStats) t := finishCopTask(ds.ctx, cop) reader := t.plan() return p.constructInnerUnionScan(us, reader) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index b71546145175b..6fdae516786da 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -528,7 +528,8 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candid } // prop.IsEmpty() would always return true when coming to here, // so we can just use prop.ExpectedCnt as parameter of addPushedDownSelection. - is.addPushedDownSelection(cop, ds, prop.ExpectedCnt, path) + finalStats := ds.stats.ScaleByExpectCnt(prop.ExpectedCnt) + is.addPushedDownSelection(cop, ds, path, finalStats) if prop.TaskTp == property.RootTaskType { task = finishCopTask(ds.ctx, task) } else if _, ok := task.(*rootTask); ok { @@ -569,16 +570,16 @@ func (is *PhysicalIndexScan) initSchema(id int, idx *model.IndexInfo, isDoubleRe is.SetSchema(expression.NewSchema(indexCols...)) } -func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, expectedCnt float64, path *accessPath) { +func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, path *accessPath, finalStats *property.StatsInfo) { // Add filter condition to table plan now. indexConds, tableConds := path.indexFilters, path.tableFilters if indexConds != nil { copTask.cst += copTask.count() * cpuFactor - count := path.countAfterAccess - if count >= 1.0 { - selectivity := path.countAfterIndex / path.countAfterAccess - count = is.stats.RowCount * selectivity + var selectivity float64 + if path.countAfterAccess > 0 { + selectivity = path.countAfterIndex / path.countAfterAccess } + count := is.stats.RowCount * selectivity stats := &property.StatsInfo{RowCount: count} indexSel := PhysicalSelection{Conditions: indexConds}.Init(is.ctx, stats) indexSel.SetChildren(is) @@ -587,7 +588,7 @@ func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSou if tableConds != nil { copTask.finishIndexPlan() copTask.cst += copTask.count() * cpuFactor - tableSel := PhysicalSelection{Conditions: tableConds}.Init(is.ctx, p.stats.ScaleByExpectCnt(expectedCnt)) + tableSel := PhysicalSelection{Conditions: tableConds}.Init(is.ctx, finalStats) tableSel.SetChildren(copTask.tablePlan) copTask.tablePlan = tableSel } diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index d6c2bfadbf922..8ff9f45cf3912 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -337,6 +337,7 @@ type DataSource struct { allConds []expression.Expression statisticTable *statistics.Table + tableStats *property.StatsInfo // possibleAccessPaths stores all the possible access path for physical plan, including table scan. possibleAccessPaths []*accessPath @@ -470,7 +471,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) { path.tableFilters = res.RemainedConds path.eqCondCount = res.EqCondCount eqOrInCount = res.EqOrInCount - path.countAfterAccess, err = ds.stats.HistColl.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges) + path.countAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges) if err != nil { return false, err } @@ -503,9 +504,9 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) { path.countAfterAccess = math.Min(ds.stats.RowCount/selectionFactor, float64(ds.statisticTable.Count)) } if path.indexFilters != nil { - selectivity, _, err := ds.stats.HistColl.Selectivity(ds.ctx, path.indexFilters) + selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, path.indexFilters) if err != nil { - logutil.Logger(context.Background()).Warn("calculate selectivity faild, use selection factor", zap.Error(err)) + logutil.Logger(context.Background()).Debug("calculate selectivity failed, use selection factor", zap.Error(err)) selectivity = selectionFactor } path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.stats.RowCount) diff --git a/planner/core/stats.go b/planner/core/stats.go index 38b73f3981d2d..c08e6d4262fe9 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -90,31 +90,29 @@ func (ds *DataSource) getColumnNDV(colID int64) (ndv float64) { return ndv } -func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) (*property.StatsInfo, *statistics.HistColl) { - profile := &property.StatsInfo{ +func (ds *DataSource) deriveStatsByFilter(conds expression.CNFExprs) { + tableStats := &property.StatsInfo{ RowCount: float64(ds.statisticTable.Count), Cardinality: make([]float64, len(ds.Columns)), HistColl: ds.statisticTable.GenerateHistCollFromColumnInfo(ds.Columns, ds.schema.Columns), StatsVersion: ds.statisticTable.Version, } if ds.statisticTable.Pseudo { - profile.StatsVersion = statistics.PseudoVersion + tableStats.StatsVersion = statistics.PseudoVersion } - for i, col := range ds.Columns { - profile.Cardinality[i] = ds.getColumnNDV(col.ID) + tableStats.Cardinality[i] = ds.getColumnNDV(col.ID) } - ds.stats = profile - selectivity, nodes, err := profile.HistColl.Selectivity(ds.ctx, conds) + ds.tableStats = tableStats + selectivity, nodes, err := tableStats.HistColl.Selectivity(ds.ctx, conds) if err != nil { - logutil.Logger(context.Background()).Warn("an error happened, use the default selectivity", zap.Error(err)) + logutil.Logger(context.Background()).Debug("an error happened, use the default selectivity", zap.Error(err)) selectivity = selectionFactor } - if ds.ctx.GetSessionVars().OptimizerSelectivityLevel >= 1 && ds.stats.HistColl != nil { - finalHist := ds.stats.HistColl.NewHistCollBySelectivity(ds.ctx.GetSessionVars().StmtCtx, nodes) - return profile, finalHist + ds.stats = tableStats.Scale(selectivity) + if ds.ctx.GetSessionVars().OptimizerSelectivityLevel >= 1 { + ds.stats.HistColl = ds.stats.HistColl.NewHistCollBySelectivity(ds.ctx.GetSessionVars().StmtCtx, nodes) } - return profile.Scale(selectivity), nil } // DeriveStats implement LogicalPlan DeriveStats interface. @@ -123,8 +121,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S for i, expr := range ds.pushedDownConds { ds.pushedDownConds[i] = expression.PushDownNot(nil, expr, false) } - var finalHist *statistics.HistColl - ds.stats, finalHist = ds.getStatsByFilter(ds.pushedDownConds) + ds.deriveStatsByFilter(ds.pushedDownConds) for _, path := range ds.possibleAccessPaths { if path.isTablePath { noIntervalRanges, err := ds.deriveTablePathStats(path) @@ -150,9 +147,6 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo) (*property.S break } } - if ds.ctx.GetSessionVars().OptimizerSelectivityLevel >= 1 { - ds.stats.HistColl = finalHist - } return ds.stats, nil }