Commit 284d3855 authored by Matt Jibson's avatar Matt Jibson

opt: add costs for join filters

Previously JOIN ON filters were costed very simply or not at all. This
adds filter costing for all JOINs and adds a common place to extend those.

This is a prerequisite for #48214.

Add a memo test in join for geolookup so that future cost changes will
be clearly displayed.

Release notes: None
parent 53607a3a
......@@ -21,7 +21,7 @@ SELECT s FROM a INNER JOIN xy ON a.k=xy.x AND i+1=10
----
================================================================================
Initial expression
Cost: 15503.39
Cost: 15503.40
================================================================================
project
├── columns: s:4
......@@ -41,7 +41,7 @@ Initial expression
└── (k:1 = x:6) AND ((i:2 + 1) = 10) [outer=(1,2,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ])]
================================================================================
NormalizeCmpPlusConst
Cost: 15470.06
Cost: 15470.07
================================================================================
project
├── columns: s:4
......@@ -63,7 +63,7 @@ NormalizeCmpPlusConst
+ └── (k:1 = x:6) AND (i:2 = (10 - 1)) [outer=(1,2,6), constraints=(/1: (/NULL - ]; /2: (/NULL - ]; /6: (/NULL - ])]
================================================================================
FoldBinary
Cost: 12203.39
Cost: 12203.40
================================================================================
project
├── columns: s:4
......@@ -85,7 +85,7 @@ FoldBinary
+ └── (k:1 = x:6) AND (i:2 = 9) [outer=(1,2,6), constraints=(/1: (/NULL - ]; /2: [/9 - /9]; /6: (/NULL - ]), fd=()-->(2)]
================================================================================
SimplifyJoinFilters
Cost: 2180.16
Cost: 2180.17
================================================================================
project
├── columns: s:4
......@@ -1753,7 +1753,7 @@ TryDecorrelateScalarGroupBy
└── case:11 [as=r:8, outer=(11)]
================================================================================
TryDecorrelateProjectSelect
Cost: 2280.14
Cost: 2280.15
================================================================================
project
├── columns: r:8
......@@ -1839,7 +1839,7 @@ TryDecorrelateProjectSelect
└── case:11 [as=r:8, outer=(11)]
================================================================================
DecorrelateJoin
Cost: 2280.14
Cost: 2280.15
================================================================================
project
├── columns: r:8
......
......@@ -21,6 +21,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/opt/ordering"
"github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
"golang.org/x/tools/container/intsets"
......@@ -361,8 +362,26 @@ func (c *coster) computeHashJoinCost(join memo.RelExpr) memo.Cost {
}
cost += memo.Cost(rowsProcessed) * cpuCostFactor
// TODO(rytaft): Add a constant "setup" cost per extra ON condition similar
// to merge join and lookup join.
// Compute filter cost. Fetch the equality columns so they can be
// ignored later.
on := join.Child(2).(*memo.FiltersExpr)
leftEq, rightEq := memo.ExtractJoinEqualityColumns(
join.Child(0).(memo.RelExpr).Relational().OutputCols,
join.Child(1).(memo.RelExpr).Relational().OutputCols,
*on,
)
// Generate a quick way to lookup if two columns are join equality
// columns. We add in both directions because we don't know which way
// the equality filters will be defined.
eqMap := util.FastIntMap{}
for i := range leftEq {
left := int(leftEq[i])
right := int(rightEq[i])
eqMap.Set(left, right)
eqMap.Set(right, left)
}
cost += c.computeFiltersCost(*on, eqMap)
return cost
}
......@@ -382,10 +401,7 @@ func (c *coster) computeMergeJoinCost(join *memo.MergeJoinExpr) memo.Cost {
}
cost += memo.Cost(rowsProcessed) * cpuCostFactor
// Add a constant "setup" cost per ON condition to account for the fact that
// the rowsProcessed estimate alone cannot effectively discriminate between
// plans when RowCount is too small.
cost += cpuCostFactor * memo.Cost(len(join.On))
cost += c.computeFiltersCost(join.On, util.FastIntMap{})
return cost
}
......@@ -452,10 +468,7 @@ func (c *coster) computeLookupJoinCost(
}
cost += memo.Cost(rowsProcessed) * perRowCost
// Add a constant "setup" cost per ON condition to account for the fact that
// the rowsProcessed estimate alone cannot effectively discriminate between
// plans when RowCount is too small.
cost += cpuCostFactor * memo.Cost(len(join.On))
cost += c.computeFiltersCost(join.On, util.FastIntMap{})
return cost
}
......@@ -464,6 +477,37 @@ func (c *coster) computeGeoLookupJoinCost(join *memo.GeoLookupJoinExpr) memo.Cos
return 0
}
func (c *coster) computeFiltersCost(filters memo.FiltersExpr, eqMap util.FastIntMap) memo.Cost {
var cost memo.Cost
for i := range filters {
f := &filters[i]
switch f.Condition.Op() {
case opt.EqOp:
eq := f.Condition.(*memo.EqExpr)
leftVar, ok := eq.Left.(*memo.VariableExpr)
if !ok {
break
}
rightVar, ok := eq.Right.(*memo.VariableExpr)
if !ok {
break
}
if val, ok := eqMap.Get(int(leftVar.Col)); ok && val == int(rightVar.Col) {
// Equality filters on some joins are still in
// filters, while others have already removed
// them. They do not cost anything.
continue
}
}
// Add a constant "setup" cost per ON condition to account for the fact that
// the rowsProcessed estimate alone cannot effectively discriminate between
// plans when RowCount is too small.
cost += cpuCostFactor
}
return cost
}
func (c *coster) computeZigzagJoinCost(join *memo.ZigzagJoinExpr) memo.Cost {
rowCount := join.Relational().Stats.RowCount
......@@ -484,6 +528,8 @@ func (c *coster) computeZigzagJoinCost(join *memo.ZigzagJoinExpr) memo.Cost {
// Double the cost of emitting rows as well as the cost of seeking rows,
// given two indexes will be accessed.
cost := memo.Cost(rowCount) * (2*(cpuCostFactor+seqIOCostFactor) + scanCost)
cost += c.computeFiltersCost(join.On, util.FastIntMap{})
return cost
}
......
......@@ -675,7 +675,7 @@ memo (optimized, ~8KB, required=[presentation: a:1,b:2,c:3,k:5])
├── G1: (inner-join G2 G3 G4) (inner-join G3 G2 G4)
│ └── [presentation: a:1,b:2,c:3,k:5]
│ ├── best: (inner-join G2 G3 G4)
│ └── cost: 12120.05
│ └── cost: 12120.06
├── G2: (scan abc,cols=(1-3)) (scan [email protected],cols=(1-3)) (scan [email protected],cols=(1-3))
│ └── []
│ ├── best: (scan abc,cols=(1-3))
......@@ -1678,6 +1678,73 @@ project
└── projections
└── sum:15 / (st_area(n.geom:14) / 1e+06) [as=popn_per_sqkm:16, outer=(14,15), side-effects]
memo expect=GenerateGeoLookupJoins
SELECT
n.name,
Sum(c.popn_total) / (ST_Area(n.geom) / 1000000.0) AS popn_per_sqkm
FROM nyc_census_blocks AS c
JOIN nyc_neighborhoods AS n
ON ST_Intersects(c.geom, n.geom) AND c.boroname = n.boroname
WHERE n.name = 'Upper West Side'
OR n.name = 'Upper East Side'
GROUP BY n.name, n.geom
----
memo (optimized, ~23KB, required=[presentation: name:13,popn_per_sqkm:16])
├── G1: (project G2 G3 name)
│ └── [presentation: name:13,popn_per_sqkm:16]
│ ├── best: (project G2 G3 name)
│ └── cost: 5102.53
├── G2: (group-by G4 G5 cols=(13,14))
│ └── []
│ ├── best: (group-by G4 G5 cols=(13,14))
│ └── cost: 5102.48
├── G3: (projections G6)
├── G4: (inner-join G7 G8 G9) (inner-join G8 G7 G9) (lookup-join G10 G9 nyc_census_blocks,keyCols=[1],outCols=(3,9,10,12-14))
│ └── []
│ ├── best: (lookup-join G10 G9 nyc_census_blocks,keyCols=[1],outCols=(3,9,10,12-14))
│ └── cost: 4895.53
├── G5: (aggregations G11)
├── G6: (div G12 G13)
├── G7: (scan c,cols=(3,9,10))
│ └── []
│ ├── best: (scan c,cols=(3,9,10))
│ └── cost: 43837.24
├── G8: (select G14 G15)
│ └── []
│ ├── best: (select G14 G15)
│ └── cost: 139.35
├── G9: (filters G16 G17)
├── G10: (geo-lookup-join G8 G18 [email protected]_census_blocks_geo_idx)
│ └── []
│ ├── best: (geo-lookup-join G8 G18 [email protected]_census_blocks_geo_idx)
│ └── cost: 139.36
├── G11: (sum G19)
├── G12: (variable sum)
├── G13: (div G20 G21)
├── G14: (scan n,cols=(12-14))
│ └── []
│ ├── best: (scan n,cols=(12-14))
│ └── cost: 138.05
├── G15: (filters G22)
├── G16: (function G23 st_intersects)
├── G17: (eq G24 G25)
├── G18: (filters)
├── G19: (variable popn_total)
├── G20: (function G26 st_area)
├── G21: (const 1e+06)
├── G22: (or G27 G28)
├── G23: (scalar-list G29 G30)
├── G24: (variable c.boroname)
├── G25: (variable n.boroname)
├── G26: (scalar-list G30)
├── G27: (eq G31 G32)
├── G28: (eq G31 G33)
├── G29: (variable c.geom)
├── G30: (variable n.geom)
├── G31: (variable name)
├── G32: (const 'Upper West Side')
└── G33: (const 'Upper East Side')
# --------------------------------------------------
# GenerateZigZagJoins
# --------------------------------------------------
......@@ -1717,7 +1784,7 @@ memo (optimized, ~13KB, required=[presentation: q:2,r:3])
├── G1: (select G2 G3) (zigzag-join G3 [email protected] [email protected]) (select G4 G5) (select G6 G7) (select G8 G7)
│ └── [presentation: q:2,r:3]
│ ├── best: (zigzag-join G3 [email protected] [email protected])
│ └── cost: 0.22
│ └── cost: 0.24
├── G2: (scan pqr,cols=(2,3))
│ └── []
│ ├── best: (scan pqr,cols=(2,3))
......@@ -1798,7 +1865,7 @@ memo (optimized, ~15KB, required=[presentation: q:2,r:3,s:4])
├── G1: (select G2 G3) (lookup-join G4 G5 pqr,keyCols=[1],outCols=(2-4)) (select G6 G7) (select G8 G9) (select G10 G9)
│ └── [presentation: q:2,r:3,s:4]
│ ├── best: (lookup-join G4 G5 pqr,keyCols=[1],outCols=(2-4))
│ └── cost: 0.84
│ └── cost: 0.86
├── G2: (scan pqr,cols=(2-4))
│ └── []
│ ├── best: (scan pqr,cols=(2-4))
......@@ -1807,7 +1874,7 @@ memo (optimized, ~15KB, required=[presentation: q:2,r:3,s:4])
├── G4: (zigzag-join G3 [email protected] [email protected])
│ └── []
│ ├── best: (zigzag-join G3 [email protected] [email protected])
│ └── cost: 0.22
│ └── cost: 0.24
├── G5: (filters)
├── G6: (index-join G13 pqr,cols=(2-4))
│ └── []
......@@ -1863,7 +1930,7 @@ memo (optimized, ~11KB, required=[presentation: q:2,s:4])
├── G1: (select G2 G3) (zigzag-join G3 [email protected] [email protected]) (select G4 G5) (select G6 G7)
│ └── [presentation: q:2,s:4]
│ ├── best: (zigzag-join G3 [email protected] [email protected])
│ └── cost: 0.22
│ └── cost: 0.24
├── G2: (scan pqr,cols=(2,4))
│ └── []
│ ├── best: (scan pqr,cols=(2,4))
......@@ -1917,7 +1984,7 @@ memo (optimized, ~13KB, required=[presentation: r:3,t:5])
├── G1: (select G2 G3) (zigzag-join G3 [email protected] [email protected]) (select G4 G5) (select G6 G5) (select G7 G8)
│ └── [presentation: r:3,t:5]
│ ├── best: (zigzag-join G3 [email protected] [email protected])
│ └── cost: 0.22
│ └── cost: 0.24
├── G2: (scan pqr,cols=(3,5))
│ └── []
│ ├── best: (scan pqr,cols=(3,5))
......@@ -2008,7 +2075,7 @@ memo (optimized, ~31KB, required=[presentation: p:1,q:2,r:3,s:4])
├── G1: (select G2 G3) (lookup-join G4 G5 pqr,keyCols=[1],outCols=(1-4)) (zigzag-join G3 [email protected] [email protected]) (zigzag-join G3 [email protected] [email protected]) (lookup-join G6 G7 pqr,keyCols=[1],outCols=(1-4)) (select G8 G9) (select G10 G11) (select G12 G7) (select G13 G7)
│ └── [presentation: p:1,q:2,r:3,s:4]
│ ├── best: (zigzag-join G3 [email protected] [email protected])
│ └── cost: 0.01
│ └── cost: 0.04
├── G2: (scan pqr,cols=(1-4))
│ └── []
│ ├── best: (scan pqr,cols=(1-4))
......@@ -2017,12 +2084,12 @@ memo (optimized, ~31KB, required=[presentation: p:1,q:2,r:3,s:4])
├── G4: (zigzag-join G17 [email protected] [email protected])
│ └── []
│ ├── best: (zigzag-join G17 [email protected] [email protected])
│ └── cost: 0.22
│ └── cost: 0.24
├── G5: (filters G16)
├── G6: (zigzag-join G9 [email protected] [email protected])
│ └── []
│ ├── best: (zigzag-join G9 [email protected] [email protected])
│ └── cost: 0.22
│ └── cost: 0.24
├── G7: (filters G14)
├── G8: (index-join G18 pqr,cols=(1-4))
│ └── []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment