Commit ef4eec09 authored by craig[bot]'s avatar craig[bot]

Merge #49802

49802: opt: modify limit pushdown rule to support inner joins r=DrewKimball a=DrewKimball

Previously, the optimizer could not push a limit into an InnerJoin.
This patch replaces PushLimitIntoLeftJoin with two rules which
perform the same function as well as handle the InnerJoin case.
A limit can be pushed into a given side of an InnerJoin when rows
from that side are guaranteed to be preserved by the join.

Release note (sql change): improve performance for queries with a
limit on a join that is guaranteed to preserve input rows.
Co-authored-by: default avatarDrew Kimball <[email protected]>
parents 1e836e6d ada13197
......@@ -242,8 +242,8 @@ func filtersMatchAllLeftRows(left, right RelExpr, filters FiltersExpr) bool {
// input, so every left row is guaranteed to match at least once.
return true
}
// Case 1b: if there is at least one not-null foreign key column referencing
// the unfiltered right columns, return true. Otherwise, false.
// Case 1b: if there is at least one not-null foreign key referencing the
// unfiltered right columns, return true. Otherwise, false.
return makeForeignKeyMap(
md, left.Relational().NotNullCols, deriveUnfilteredCols(right)) != nil
}
......
......@@ -123,14 +123,39 @@ $input
$private
)
# PushLimitIntoLeftJoin pushes a Limit into the left input of a left join. Since
# the left join creates an output row for each left input row, we only need that
# many rows from that input. We can only do this if the limit ordering refers
# only to the left input columns. We also check that the cardinality of the left
# input is more than the limit, to prevent repeated applications of the rule.
[PushLimitIntoLeftJoin, Normalize]
# PushLimitIntoJoinLeft pushes a Limit into the left input of an InnerJoin or
# LeftJoin when rows from the left input are guaranteed to be preserved by the
# join. Since the join creates an output row for each left input row, we only
# need that many rows from that input. We can only do this if the limit ordering
# refers only to the left input columns. We also check that the cardinality of
# the left input is more than the limit, to prevent repeated applications of the
# rule.
#
# Why can we only match InnerJoins and LeftJoins (e.g. not FullJoins)?
#
# CREATE TABLE t_x (x INT PRIMARY KEY)
# CREATE TABLE t_r (r INT NOT NULL REFERENCES t_x(x))
#
# SELECT * FROM t_r FULL JOIN t_x ON r = x LIMIT 10
# vs
# SELECT * FROM (SELECT * FROM t_r LIMIT 10) FULL JOIN t_x ON r = x LIMIT 10
#
# In the first query, all rows from t_r (left rows) would have a chance to match
# with a row from t_x. In the second query, left rows that otherwise would have
# matched may be filtered out by the limit. Rows from t_x would then no longer
# have matches, and would be outputted by the FullJoin with the left side
# (t_r columns) null-extended. Therefore, pushing the limit into a join input
# that may be null-extended (either input of a FullJoin) can lead to output rows
# being replaced with null values.
[PushLimitIntoJoinLeft, Normalize]
(Limit
$input:(LeftJoin $left:* $right:* $on:* $private:*)
$input:(InnerJoin | LeftJoin
$left:*
$right:*
$on:*
$private:*
) &
(JoinPreservesLeftRows $input)
$limitExpr:(Const $limit:*) &
(IsPositiveInt $limit) &
^(LimitGeMaxRows $limit $left)
......@@ -142,7 +167,7 @@ $input
)
=>
(Limit
(LeftJoin
((OpName $input)
(Limit $left $limitExpr (PruneOrdering $ordering $cols))
$right
$on
......@@ -151,3 +176,29 @@ $input
$limitExpr
$ordering
)
# PushLimitIntoJoinRight mirrors PushLimitIntoJoinLeft.
[PushLimitIntoJoinRight, Normalize]
(Limit
$input:(InnerJoin $left:* $right:* $on:* $private:*) &
(JoinPreservesRightRows $input)
$limitExpr:(Const $limit:*) &
(IsPositiveInt $limit) &
^(LimitGeMaxRows $limit $right)
$ordering:* &
(OrderingCanProjectCols
$ordering
$cols:(OutputCols $right)
)
)
=>
(Limit
((OpName $input)
$left
(Limit $right $limitExpr (PruneOrdering $ordering $cols))
$on
$private
)
$limitExpr
$ordering
)
......@@ -14,6 +14,10 @@ exec-ddl
CREATE TABLE uv (u INT PRIMARY KEY, v INT)
----
exec-ddl
CREATE TABLE kvr_fk(k INT PRIMARY KEY, v INT, r INT NOT NULL REFERENCES uv(u))
----
# --------------------------------------------------
# EliminateLimit
# --------------------------------------------------
......@@ -720,11 +724,46 @@ limit
│ └── limit hint: 10.00
└── 10
# ---------------------
# PushLimitIntoLeftJoin
# ---------------------
# ------------------------------------------------
# PushLimitIntoJoinLeft and PushLimitIntoJoinRight
# ------------------------------------------------
norm expect=PushLimitIntoLeftJoin
# InnerJoin case.
norm expect=PushLimitIntoJoinLeft
SELECT * FROM kvr_fk INNER JOIN uv ON r = u LIMIT 10
----
limit
├── columns: k:1!null v:2 r:3!null u:4!null v:5
├── cardinality: [0 - 10]
├── key: (1)
├── fd: (1)-->(2,3), (4)-->(5), (3)==(4), (4)==(3)
├── inner-join (hash)
│ ├── columns: k:1!null kvr_fk.v:2 r:3!null u:4!null uv.v:5
│ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more)
│ ├── key: (1)
│ ├── fd: (1)-->(2,3), (4)-->(5), (3)==(4), (4)==(3)
│ ├── limit hint: 10.00
│ ├── limit
│ │ ├── columns: k:1!null kvr_fk.v:2 r:3!null
│ │ ├── cardinality: [0 - 10]
│ │ ├── key: (1)
│ │ ├── fd: (1)-->(2,3)
│ │ ├── scan kvr_fk
│ │ │ ├── columns: k:1!null kvr_fk.v:2 r:3!null
│ │ │ ├── key: (1)
│ │ │ ├── fd: (1)-->(2,3)
│ │ │ └── limit hint: 10.00
│ │ └── 10
│ ├── scan uv
│ │ ├── columns: u:4!null uv.v:5
│ │ ├── key: (4)
│ │ └── fd: (4)-->(5)
│ └── filters
│ └── r:3 = u:4 [outer=(3,4), constraints=(/3: (/NULL - ]; /4: (/NULL - ]), fd=(3)==(4), (4)==(3)]
└── 10
# LeftJoin case.
norm expect=PushLimitIntoJoinLeft
SELECT * FROM ab LEFT JOIN uv ON a = u LIMIT 10
----
limit
......@@ -757,8 +796,42 @@ limit
│ └── a:1 = u:3 [outer=(1,3), constraints=(/1: (/NULL - ]; /3: (/NULL - ]), fd=(1)==(3), (3)==(1)]
└── 10
# InnerJoin case for PushLimitIntoJoinRight.
norm expect=PushLimitIntoJoinRight
SELECT * FROM uv INNER JOIN kvr_fk ON u = r LIMIT 10
----
limit
├── columns: u:1!null v:2 k:3!null v:4 r:5!null
├── cardinality: [0 - 10]
├── key: (3)
├── fd: (1)-->(2), (3)-->(4,5), (1)==(5), (5)==(1)
├── inner-join (hash)
│ ├── columns: u:1!null uv.v:2 k:3!null kvr_fk.v:4 r:5!null
│ ├── multiplicity: left-rows(zero-or-more), right-rows(exactly-one)
│ ├── key: (3)
│ ├── fd: (1)-->(2), (3)-->(4,5), (1)==(5), (5)==(1)
│ ├── limit hint: 10.00
│ ├── scan uv
│ │ ├── columns: u:1!null uv.v:2
│ │ ├── key: (1)
│ │ └── fd: (1)-->(2)
│ ├── limit
│ │ ├── columns: k:3!null kvr_fk.v:4 r:5!null
│ │ ├── cardinality: [0 - 10]
│ │ ├── key: (3)
│ │ ├── fd: (3)-->(4,5)
│ │ ├── scan kvr_fk
│ │ │ ├── columns: k:3!null kvr_fk.v:4 r:5!null
│ │ │ ├── key: (3)
│ │ │ ├── fd: (3)-->(4,5)
│ │ │ └── limit hint: 10.00
│ │ └── 10
│ └── filters
│ └── u:1 = r:5 [outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)]
└── 10
# Ordering can be pushed down.
norm expect=PushLimitIntoLeftJoin
norm expect=PushLimitIntoJoinLeft
SELECT * FROM ab LEFT JOIN uv ON a = u ORDER BY a LIMIT 10
----
limit
......@@ -800,7 +873,7 @@ limit
│ └── a:1 = u:3 [outer=(1,3), constraints=(/1: (/NULL - ]; /3: (/NULL - ]), fd=(1)==(3), (3)==(1)]
└── 10
norm expect=PushLimitIntoLeftJoin
norm expect=PushLimitIntoJoinLeft
SELECT * FROM ab LEFT JOIN uv ON a = u ORDER BY b LIMIT 10
----
limit
......@@ -848,7 +921,7 @@ limit
# Ordering on u is not equivalent to ordering on a because of NULLs; it cannot
# be pushed down.
norm expect-not=PushLimitIntoLeftJoin
norm expect-not=PushLimitIntoJoinLeft
SELECT * FROM ab LEFT JOIN uv ON a = u ORDER BY u LIMIT 10
----
limit
......@@ -882,7 +955,7 @@ limit
└── 10
# Ordering cannot be pushed down.
norm expect-not=PushLimitIntoLeftJoin
norm expect-not=PushLimitIntoJoinLeft
SELECT * FROM ab LEFT JOIN uv ON a = u ORDER BY v LIMIT 10
----
limit
......@@ -915,7 +988,7 @@ limit
│ └── a:1 = u:3 [outer=(1,3), constraints=(/1: (/NULL - ]; /3: (/NULL - ]), fd=(1)==(3), (3)==(1)]
└── 10
norm expect-not=PushLimitIntoLeftJoin
norm expect-not=PushLimitIntoJoinLeft
SELECT * FROM ab LEFT JOIN uv ON b = v ORDER BY a, v LIMIT 10
----
limit
......@@ -948,7 +1021,7 @@ limit
│ └── b:2 = v:4 [outer=(2,4), constraints=(/2: (/NULL - ]; /4: (/NULL - ]), fd=(2)==(4), (4)==(2)]
└── 10
norm expect-not=PushLimitIntoLeftJoin
norm expect-not=PushLimitIntoJoinLeft
SELECT * FROM ab LEFT JOIN uv ON a = u ORDER BY u, b LIMIT 10
----
limit
......@@ -983,7 +1056,7 @@ limit
# Rule should not fire if the input's cardinality is already less than the
# limit.
norm expect-not=PushLimitIntoLeftJoin
norm expect-not=PushLimitIntoJoinLeft
SELECT * FROM (SELECT * FROM ab LIMIT 5) LEFT JOIN uv ON a = u LIMIT 10
----
limit
......@@ -1017,7 +1090,7 @@ limit
└── 10
# Push the limit even if the input is already limited (but with a higher limit).
norm expect=PushLimitIntoLeftJoin
norm expect=PushLimitIntoJoinLeft
SELECT * FROM (SELECT * FROM ab LIMIT 20) LEFT JOIN uv ON a = u LIMIT 10
----
limit
......@@ -1058,7 +1131,7 @@ limit
└── 10
# Don't push negative limits (or we would enter an infinite loop).
norm expect-not=PushLimitIntoLeftJoin
norm expect-not=PushLimitIntoJoinLeft
SELECT * FROM ab LEFT JOIN uv ON a = u LIMIT -1
----
limit
......@@ -1084,3 +1157,91 @@ limit
│ └── filters
│ └── a:1 = u:3 [outer=(1,3), constraints=(/1: (/NULL - ]; /3: (/NULL - ]), fd=(1)==(3), (3)==(1)]
└── -1
# Don't push limits into an inner join that may not preserve rows.
norm expect-not=(PushLimitIntoJoinLeft,PushLimitIntoJoinRight)
SELECT * FROM ab INNER JOIN uv ON a = u LIMIT 10
----
limit
├── columns: a:1!null b:2 u:3!null v:4
├── cardinality: [0 - 10]
├── key: (3)
├── fd: (1)-->(2), (3)-->(4), (1)==(3), (3)==(1)
├── inner-join (hash)
│ ├── columns: a:1!null b:2 u:3!null v:4
│ ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-one)
│ ├── key: (3)
│ ├── fd: (1)-->(2), (3)-->(4), (1)==(3), (3)==(1)
│ ├── limit hint: 10.00
│ ├── scan ab
│ │ ├── columns: a:1!null b:2
│ │ ├── key: (1)
│ │ └── fd: (1)-->(2)
│ ├── scan uv
│ │ ├── columns: u:3!null v:4
│ │ ├── key: (3)
│ │ └── fd: (3)-->(4)
│ └── filters
│ └── a:1 = u:3 [outer=(1,3), constraints=(/1: (/NULL - ]; /3: (/NULL - ]), fd=(1)==(3), (3)==(1)]
└── 10
# Don't push a limit into the right side of a LeftJoin.
norm expect-not=PushLimitIntoJoinRight
SELECT * FROM uv LEFT JOIN kvr_fk ON u = r LIMIT 10
----
limit
├── columns: u:1!null v:2 k:3 v:4 r:5
├── cardinality: [0 - 10]
├── key: (1,3)
├── fd: (1)-->(2), (3)-->(4,5)
├── left-join (hash)
│ ├── columns: u:1!null uv.v:2 k:3 kvr_fk.v:4 r:5
│ ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-one)
│ ├── key: (1,3)
│ ├── fd: (1)-->(2), (3)-->(4,5)
│ ├── limit hint: 10.00
│ ├── limit
│ │ ├── columns: u:1!null uv.v:2
│ │ ├── cardinality: [0 - 10]
│ │ ├── key: (1)
│ │ ├── fd: (1)-->(2)
│ │ ├── scan uv
│ │ │ ├── columns: u:1!null uv.v:2
│ │ │ ├── key: (1)
│ │ │ ├── fd: (1)-->(2)
│ │ │ └── limit hint: 10.00
│ │ └── 10
│ ├── scan kvr_fk
│ │ ├── columns: k:3!null kvr_fk.v:4 r:5!null
│ │ ├── key: (3)
│ │ └── fd: (3)-->(4,5)
│ └── filters
│ └── u:1 = r:5 [outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)]
└── 10
# Don't push a limit into either side of a FullJoin.
norm expect-not=(PushLimitIntoJoinLeft,PushLimitIntoJoinRight)
SELECT * FROM ab FULL JOIN uv ON a = u LIMIT 10
----
limit
├── columns: a:1 b:2 u:3 v:4
├── cardinality: [0 - 10]
├── key: (1,3)
├── fd: (1)-->(2), (3)-->(4)
├── full-join (hash)
│ ├── columns: a:1 b:2 u:3 v:4
│ ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one)
│ ├── key: (1,3)
│ ├── fd: (1)-->(2), (3)-->(4)
│ ├── limit hint: 10.00
│ ├── scan ab
│ │ ├── columns: a:1!null b:2
│ │ ├── key: (1)
│ │ └── fd: (1)-->(2)
│ ├── scan uv
│ │ ├── columns: u:3!null v:4
│ │ ├── key: (3)
│ │ └── fd: (3)-->(4)
│ └── filters
│ └── a:1 = u:3 [outer=(1,3), constraints=(/1: (/NULL - ]; /3: (/NULL - ]), fd=(1)==(3), (3)==(1)]
└── 10
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment