Skip to content

Commit

Permalink
opt: add a range operator to fix selectivity estimation of range pred…
Browse files Browse the repository at this point in the history
…icates

This commit adds a new scalar operator called Range. Range contains a single
input, which is an And expression that constrains a single variable to a
range. For example, the And expression might be x >= 6 AND x <= 10.

This commit also adds a new normalization rule called ConsolidateSelectFilters,
which consolidates filters that constrain a single variable, and puts them
into a Range operation. For example, filters x >= 6 and x <= 10 would be
consolidated into a single Range operation.

The benefit of consolidating these filters is it allows a single constraint
to be generated for the variable instead of multiple. In the example above,
we can generate the single constraint [/6 - /10] instead of the two
constraints [/6 - ] and [ - /10]. The single constraint allows us to better
estimate the selectivity of the predicate when calculating statistics for
the Select expression.

For example, suppose that x initially has 1,000,000 distinct values. Once
we apply the predicate x >= 6 AND x <= 10, it has at most 5 distinct values.
Assuming a uniform data distribution, the selectivity of this predicate is
5/1,000,000, or 0.000005. Prior to this commit, we were significantly
overestimating the selectivity as 1/9, or 0.111111.

Fixes cockroachdb#35947

Release note (performance improvement): Improved the selectivity estimation
of range predicates during query optimization.
  • Loading branch information
rytaft committed Mar 24, 2019
1 parent c59f534 commit e3452e7
Show file tree
Hide file tree
Showing 24 changed files with 2,996 additions and 464 deletions.
3 changes: 3 additions & 0 deletions pkg/sql/opt/exec/execbuilder/scalar_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ func (b *Builder) buildBoolean(ctx *buildScalarCtx, scalar opt.ScalarExpr) (tree
case opt.FiltersItemOp:
return b.buildScalar(ctx, scalar.Child(0).(opt.ScalarExpr))

case opt.RangeOp:
return b.buildScalar(ctx, scalar.Child(0).(opt.ScalarExpr))

default:
panic(pgerror.NewAssertionErrorf("invalid op %s", log.Safe(scalar.Op())))
}
Expand Down
18 changes: 4 additions & 14 deletions pkg/sql/opt/exec/execbuilder/testdata/explain
Original file line number Diff line number Diff line change
Expand Up @@ -480,23 +480,13 @@ values · · (column1 int, column2 int, column3 i
· row 1, expr 1 (5)[int] · ·
· row 1, expr 2 (6)[int] · ·

# TODO(rytaft): range operator needed to detect contradiction
query TTTTT
EXPLAIN (TYPES) SELECT 2*count(k) as z, v FROM t WHERE v>123 GROUP BY v HAVING v<2 AND count(k)>1
----
render · · (z int, v int) ·
│ render 0 ((agg0)[int] * (2)[int])[int] · ·
│ render 1 (v)[int] · ·
└── filter · · (v int, agg0 int) ·
│ filter ((agg0)[int] > (1)[int])[bool] · ·
└── group · · (v int, agg0 int) ·
│ aggregate 0 v · ·
│ aggregate 1 count(k) · ·
│ group by @2 · ·
└── scan · · (k int, v int) ·
· table t@primary · ·
· spans ALL · ·
· filter ((((v)[int] > (123)[int])[bool]) AND (((v)[int] < (2)[int])[bool]))[bool] · ·
render · · (z int, v int) ·
│ render 0 (z)[int] · ·
│ render 1 (v)[int] · ·
└── norows · · (v int, z int) ·

query TTTTT
EXPLAIN (TYPES) DELETE FROM t WHERE v > 1
Expand Down
27 changes: 11 additions & 16 deletions pkg/sql/opt/exec/execbuilder/testdata/join
Original file line number Diff line number Diff line change
Expand Up @@ -606,22 +606,17 @@ SELECT *
WHERE (a IS NULL OR a > 2) AND n > 1 AND (a IS NULL OR a < sq)
]
----
render · ·
│ render 0 a
│ render 1 b
│ render 2 n
│ render 3 sq
└── hash-join · ·
│ type inner
│ equality (sq) = (b)
├── scan · ·
│ table square@primary
│ spans /2-/5/#
│ parallel ·
└── scan · ·
· table pairs@primary
· spans ALL
· filter ((a > 1) AND ((a IS NULL) OR (a > 2))) AND ((a IS NULL) OR (a < b))
hash-join · ·
│ type inner
│ equality (b) = (sq)
├── scan · ·
│ table pairs@primary
│ spans ALL
│ filter ((a > 1) AND ((a IS NULL) OR (a > 2))) AND ((a IS NULL) OR (a < b))
└── scan · ·
· table square@primary
· spans /2-/5/#
· parallel ·


statement ok
Expand Down
8 changes: 7 additions & 1 deletion pkg/sql/opt/idxconstraint/index_constraints.go
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,9 @@ func (c *indexConstraintCtx) makeSpansForExpr(
if c.colType(offset) == types.Bool && c.isIndexColumn(t.Input, offset) {
return c.makeSpansForSingleColumnDatum(offset, opt.EqOp, tree.DBoolFalse, out)
}

case *memo.RangeExpr:
return c.makeSpansForExpr(offset, t.And, out)
}

if e.ChildCount() < 2 {
Expand Down Expand Up @@ -986,7 +989,7 @@ func (c *indexConstraintCtx) getMaxSimplifyPrefix(idxConstraint *constraint.Cons
func (c *indexConstraintCtx) simplifyFilter(
scalar opt.ScalarExpr, final *constraint.Constraint, maxSimplifyPrefix int,
) opt.ScalarExpr {
// Special handling for And, Or, and Filters.
// Special handling for And, Or, and Range.
switch t := scalar.(type) {
case *memo.AndExpr:
left := c.simplifyFilter(t.Left, final, maxSimplifyPrefix)
Expand All @@ -997,6 +1000,9 @@ func (c *indexConstraintCtx) simplifyFilter(
left := c.simplifyFilter(t.Left, final, maxSimplifyPrefix)
right := c.simplifyFilter(t.Right, final, maxSimplifyPrefix)
return c.factory.ConstructOr(left, right)

case *memo.RangeExpr:
return c.factory.ConstructRange(c.simplifyFilter(t.And, final, maxSimplifyPrefix))
}

// We try to create tight spans for the expression (as allowed by
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/opt/memo/check_expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,5 +272,13 @@ func checkFilters(filters FiltersExpr) {
if opt.IsListItemOp(item.Condition) {
panic(pgerror.NewAssertionErrorf("filters list item cannot contain another list item"))
}
if item.Condition.Op() == opt.RangeOp {
if !item.scalar.TightConstraints {
panic(pgerror.NewAssertionErrorf("Range operator should always have tight constraints"))
}
if item.scalar.OuterCols.Len() != 1 {
panic(pgerror.NewAssertionErrorf("Range operator should have exactly one outer col"))
}
}
}
}
3 changes: 3 additions & 0 deletions pkg/sql/opt/memo/constraint_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,9 @@ func (cb *constraintsBuilder) buildConstraints(e opt.ScalarExpr) (_ *constraint.
cl = cl.Intersect(cb.evalCtx, cr)
tightl = tightl && tightr
return cl, (tightl || cl == contradiction)

case *RangeExpr:
return cb.buildConstraints(t.And)
}

if e.ChildCount() < 2 {
Expand Down
108 changes: 61 additions & 47 deletions pkg/sql/opt/memo/testdata/logprops/constraints
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,14 @@ select
│ ├── columns: x:1(int) y:2(int)
│ └── prune: (1,2)
└── filters
├── gt [type=bool, outer=(1), constraints=(/1: [/1 - ]; tight)]
│ ├── variable: x [type=int]
│ └── const: 0 [type=int]
└── lt [type=bool, outer=(1), constraints=(/1: (/NULL - /1]; tight)]
├── variable: x [type=int]
└── const: 2 [type=int]
└── range [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight), fd=()-->(1)]
└── and [type=bool]
├── gt [type=bool]
│ ├── variable: x [type=int]
│ └── const: 0 [type=int]
└── lt [type=bool]
├── variable: x [type=int]
└── const: 2 [type=int]

opt
SELECT * FROM a WHERE x >= 1
Expand Down Expand Up @@ -119,12 +121,14 @@ select
│ ├── columns: x:1(int) y:2(int)
│ └── prune: (1,2)
└── filters
├── gt [type=bool, outer=(1), constraints=(/1: [/2 - ]; tight)]
│ ├── variable: x [type=int]
│ └── const: 1 [type=int]
└── lt [type=bool, outer=(1), constraints=(/1: (/NULL - /4]; tight)]
├── variable: x [type=int]
└── const: 5 [type=int]
└── range [type=bool, outer=(1), constraints=(/1: [/2 - /4]; tight)]
└── and [type=bool]
├── gt [type=bool]
│ ├── variable: x [type=int]
│ └── const: 1 [type=int]
└── lt [type=bool]
├── variable: x [type=int]
└── const: 5 [type=int]

opt
SELECT * FROM a WHERE x = 1 AND y = 5
Expand Down Expand Up @@ -152,18 +156,22 @@ select
│ ├── columns: x:1(int) y:2(int)
│ └── prune: (1,2)
└── filters
├── gt [type=bool, outer=(1), constraints=(/1: [/2 - ]; tight)]
│ ├── variable: x [type=int]
│ └── const: 1 [type=int]
├── lt [type=bool, outer=(1), constraints=(/1: (/NULL - /4]; tight)]
│ ├── variable: x [type=int]
│ └── const: 5 [type=int]
├── ge [type=bool, outer=(2), constraints=(/2: [/7 - ]; tight)]
│ ├── variable: y [type=int]
│ └── const: 7 [type=int]
└── le [type=bool, outer=(2), constraints=(/2: (/NULL - /9]; tight)]
├── variable: y [type=int]
└── const: 9 [type=int]
├── range [type=bool, outer=(1), constraints=(/1: [/2 - /4]; tight)]
│ └── and [type=bool]
│ ├── gt [type=bool]
│ │ ├── variable: x [type=int]
│ │ └── const: 1 [type=int]
│ └── lt [type=bool]
│ ├── variable: x [type=int]
│ └── const: 5 [type=int]
└── range [type=bool, outer=(2), constraints=(/2: [/7 - /9]; tight)]
└── and [type=bool]
├── ge [type=bool]
│ ├── variable: y [type=int]
│ └── const: 7 [type=int]
└── le [type=bool]
├── variable: y [type=int]
└── const: 9 [type=int]

# Verify the resulting constraints are not tight.
opt
Expand All @@ -175,12 +183,14 @@ select
│ ├── columns: x:1(int) y:2(int)
│ └── prune: (1,2)
└── filters
├── gt [type=bool, outer=(1), constraints=(/1: [/2 - ]; tight)]
│ ├── variable: x [type=int]
│ └── const: 1 [type=int]
├── lt [type=bool, outer=(1), constraints=(/1: (/NULL - /4]; tight)]
│ ├── variable: x [type=int]
│ └── const: 5 [type=int]
├── range [type=bool, outer=(1), constraints=(/1: [/2 - /4]; tight)]
│ └── and [type=bool]
│ ├── gt [type=bool]
│ │ ├── variable: x [type=int]
│ │ └── const: 1 [type=int]
│ └── lt [type=bool]
│ ├── variable: x [type=int]
│ └── const: 5 [type=int]
└── eq [type=bool, outer=(1,2)]
├── plus [type=int]
│ ├── variable: x [type=int]
Expand Down Expand Up @@ -262,12 +272,14 @@ select
│ ├── prune: (1-3)
│ └── interesting orderings: (+1)
└── filters
├── le [type=bool, outer=(3), constraints=(/3: (/NULL - /'foo']; tight)]
│ ├── variable: v [type=string]
│ └── const: 'foo' [type=string]
└── ge [type=bool, outer=(3), constraints=(/3: [/'bar' - ]; tight)]
├── variable: v [type=string]
└── const: 'bar' [type=string]
└── range [type=bool, outer=(3), constraints=(/3: [/'bar' - /'foo']; tight)]
└── and [type=bool]
├── le [type=bool]
│ ├── variable: v [type=string]
│ └── const: 'foo' [type=string]
└── ge [type=bool]
├── variable: v [type=string]
└── const: 'bar' [type=string]

# Test IN.
opt
Expand Down Expand Up @@ -318,17 +330,19 @@ select
│ ├── columns: x:1(int) y:2(int)
│ └── prune: (1,2)
└── filters
├── in [type=bool, outer=(1), constraints=(/1: [/1 - /1] [/3 - /3] [/5 - /5] [/7 - /7] [/9 - /9]; tight)]
│ ├── variable: x [type=int]
│ └── tuple [type=tuple{int, int, int, int, int}]
│ ├── const: 1 [type=int]
│ ├── const: 3 [type=int]
│ ├── const: 5 [type=int]
│ ├── const: 7 [type=int]
│ └── const: 9 [type=int]
└── gt [type=bool, outer=(1), constraints=(/1: [/7 - ]; tight)]
├── variable: x [type=int]
└── const: 6 [type=int]
└── range [type=bool, outer=(1), constraints=(/1: [/7 - /7] [/9 - /9]; tight)]
└── and [type=bool]
├── in [type=bool]
│ ├── variable: x [type=int]
│ └── tuple [type=tuple{int, int, int, int, int}]
│ ├── const: 1 [type=int]
│ ├── const: 3 [type=int]
│ ├── const: 5 [type=int]
│ ├── const: 7 [type=int]
│ └── const: 9 [type=int]
└── gt [type=bool]
├── variable: x [type=int]
└── const: 6 [type=int]

# Test IN in combination with a condition on another column.
opt
Expand Down
18 changes: 5 additions & 13 deletions pkg/sql/opt/memo/testdata/logprops/scan
Original file line number Diff line number Diff line change
Expand Up @@ -143,20 +143,12 @@ TABLE t
├── FAMILY family2 (c)
└── FAMILY family3 (d)

# Test case where one of the explorations causes construction of a scan with a
# contradiction.
# TODO(rytaft): Range operator
opt
SELECT 1 FROM t WHERE a > 1 AND a < 2
----
project
├── columns: "?column?":5(int!null)
values
├── columns: "?column?":5(int)
├── cardinality: [0 - 0]
├── key: ()
├── fd: ()-->(5)
├── prune: (5)
├── scan t@a_desc
│ ├── columns: a:1(int!null)
│ ├── constraint: /-1/2: contradiction
│ ├── prune: (1)
│ └── interesting orderings: (+1) (-1)
└── projections
└── const: 1 [type=int]
└── prune: (5)
7 changes: 3 additions & 4 deletions pkg/sql/opt/memo/testdata/stats/join
Original file line number Diff line number Diff line change
Expand Up @@ -393,11 +393,11 @@ SELECT * FROM xysd JOIN uv ON x=u AND y+v=5 AND y > 0 AND y < 300
----
inner-join
├── columns: x:1(int!null) y:2(int!null) s:3(string) d:4(decimal!null) u:5(int!null) v:6(int!null)
├── stats: [rows=3333.33333, distinct(1)=500, null(1)=0, distinct(2)=308.233808, null(2)=0, distinct(4)=346.00432, null(4)=0, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0]
├── stats: [rows=3333.33333, distinct(1)=500, null(1)=0, distinct(2)=298.995696, null(2)=0, distinct(4)=499.363351, null(4)=0, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0]
├── fd: (1)-->(2-4), (3,4)~~>(1,2), (1)==(5), (5)==(1)
├── select
│ ├── columns: x:1(int!null) y:2(int!null) s:3(string) d:4(decimal!null)
│ ├── stats: [rows=555.555556, distinct(1)=555.555556, null(1)=0, distinct(2)=308.239988, null(2)=0, distinct(4)=346.026926, null(4)=0]
│ ├── stats: [rows=3737.5, distinct(1)=3737.5, null(1)=0, distinct(2)=299, null(2)=0, distinct(4)=499.999473, null(4)=0]
│ ├── key: (1)
│ ├── fd: (1)-->(2-4), (3,4)~~>(1,2)
│ ├── scan xysd
Expand All @@ -406,8 +406,7 @@ inner-join
│ │ ├── key: (1)
│ │ └── fd: (1)-->(2-4), (3,4)~~>(1,2)
│ └── filters
│ ├── y > 0 [type=bool, outer=(2), constraints=(/2: [/1 - ]; tight)]
│ └── y < 300 [type=bool, outer=(2), constraints=(/2: (/NULL - /299]; tight)]
│ └── (y > 0) AND (y < 300) [type=bool, outer=(2), constraints=(/2: [/1 - /299]; tight)]
├── scan uv
│ ├── columns: u:5(int) v:6(int!null)
│ └── stats: [rows=10000, distinct(5)=500, null(5)=0, distinct(6)=100, null(6)=0]
Expand Down
17 changes: 7 additions & 10 deletions pkg/sql/opt/memo/testdata/stats/ordinality
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ SELECT * FROM (SELECT * FROM a WITH ORDINALITY) WHERE ordinality > 0 AND ordinal
----
select
├── columns: x:1(int!null) y:2(int) ordinality:3(int!null)
├── stats: [rows=444.444444, distinct(1)=444.444444, null(1)=0, distinct(3)=444.444444, null(3)=0]
├── stats: [rows=10, distinct(1)=10, null(1)=0, distinct(3)=10, null(3)=0]
├── key: (1)
├── fd: (1)-->(2,3), (3)-->(1,2)
├── row-number
Expand All @@ -43,15 +43,14 @@ select
│ ├── key: (1)
│ └── fd: (1)-->(2)
└── filters
├── ordinality > 0 [type=bool, outer=(3), constraints=(/3: [/1 - ]; tight)]
└── ordinality <= 10 [type=bool, outer=(3), constraints=(/3: (/NULL - /10]; tight)]
└── (ordinality > 0) AND (ordinality <= 10) [type=bool, outer=(3), constraints=(/3: [/1 - /10]; tight)]

norm
SELECT * FROM (SELECT * FROM a WITH ORDINALITY) WHERE y > 0 AND y <= 10
----
select
├── columns: x:1(int!null) y:2(int!null) ordinality:3(int!null)
├── stats: [rows=444.444444, distinct(1)=444.444444, null(1)=0, distinct(2)=276.821541, null(2)=0, distinct(3)=444.444444, null(3)=0]
├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=100, null(3)=0]
├── key: (1)
├── fd: (1)-->(2,3), (3)-->(1,2)
├── row-number
Expand All @@ -65,8 +64,7 @@ select
│ ├── key: (1)
│ └── fd: (1)-->(2)
└── filters
├── y > 0 [type=bool, outer=(2), constraints=(/2: [/1 - ]; tight)]
└── y <= 10 [type=bool, outer=(2), constraints=(/2: (/NULL - /10]; tight)]
└── (y > 0) AND (y <= 10) [type=bool, outer=(2), constraints=(/2: [/1 - /10]; tight)]

norm
SELECT 1 x FROM a WITH ORDINALITY
Expand All @@ -89,11 +87,11 @@ SELECT x FROM (SELECT * FROM a WITH ORDINALITY) WHERE ordinality > 0 AND ordinal
----
project
├── columns: x:1(int!null)
├── stats: [rows=444.444444]
├── stats: [rows=10]
├── key: (1)
└── select
├── columns: x:1(int!null) ordinality:3(int!null)
├── stats: [rows=444.444444, distinct(1)=444.444444, null(1)=0, distinct(3)=444.444444, null(3)=0]
├── stats: [rows=10, distinct(1)=10, null(1)=0, distinct(3)=10, null(3)=0]
├── key: (1)
├── fd: (1)-->(3), (3)-->(1)
├── row-number
Expand All @@ -106,8 +104,7 @@ project
│ ├── stats: [rows=4000, distinct(1)=5000, null(1)=0]
│ └── key: (1)
└── filters
├── ordinality > 0 [type=bool, outer=(3), constraints=(/3: [/1 - ]; tight)]
└── ordinality <= 10 [type=bool, outer=(3), constraints=(/3: (/NULL - /10]; tight)]
└── (ordinality > 0) AND (ordinality <= 10) [type=bool, outer=(3), constraints=(/3: [/1 - /10]; tight)]


norm
Expand Down
Loading

0 comments on commit e3452e7

Please sign in to comment.