Commit 37686810 authored by Drew Kimball's avatar Drew Kimball

opt: modify ConvertZipArraysToValues to handle json_array_elements

Previously, ConvertZipArraysToValues only handled cases where the unnest
function was called on either an ArrayExpr or a ConstExpr wrapping a
DArray.

This patch allows ConvertZipArraysToValues to also handle the
json_array_elements and jsonb_array_elements functions if they have
a single ConstExpr parameter that wraps a DJSON datum.

A query like this:
```
SELECT unnest(ARRAY[1,2,3,4,5]), json_array_elements('[{"a": "b"}, {"x": "y"}]');
```
Will now be unnested into this:
```
values
 ├── columns: unnest:1!null json_array_elements:2
 ├── cardinality: [5 - 5]
 ├── (1, '{"a": "b"}')
 ├── (2, '{"x": "y"}')
 ├── (3, NULL)
 ├── (4, NULL)
 └── (5, NULL)
```

Release note: None
parent 62f32ca9
......@@ -1170,7 +1170,7 @@ func (c *CustomFuncs) ProjectExtraCol(
//
// 3. There is at least one row.
//
// 4. All tuples in the single column are either TupleExpr's or ConstExpr's
// 4. All tuples in the single column are either TupleExprs or ConstExprs
// that wrap DTuples, as opposed to dynamically generated tuples.
//
func (c *CustomFuncs) CanUnnestTuplesFromValues(expr memo.RelExpr) bool {
......@@ -1707,22 +1707,34 @@ func (c *CustomFuncs) ZipOuterCols(zip memo.ZipExpr) opt.ColSet {
return colSet
}
// unnestFuncs maps function names that are supported by
// ConvertZipArraysToValues
var unnestFuncs = map[string]struct{}{
"unnest": {},
"json_array_elements": {},
"jsonb_array_elements": {},
}
// CanConstructValuesFromZips takes in an input ZipExpr and returns true if the
// ProjectSet to which the zip belongs can be converted to an InnerJoinApply
// with a Values operator on the right input.
func (c *CustomFuncs) CanConstructValuesFromZips(zip memo.ZipExpr) bool {
for _, zipItem := range zip {
fn, ok := zipItem.Fn.(*memo.FunctionExpr)
if !ok || fn.Name != "unnest" {
// Not an unnest function.
for i := range zip {
fn, ok := zip[i].Fn.(*memo.FunctionExpr)
if !ok {
// Not a FunctionExpr.
return false
}
if _, ok := unnestFuncs[fn.Name]; !ok {
// Not a supported function.
return false
}
if len(fn.Args) != 1 {
// Unnest has more than one argument.
// Function has more than one argument.
return false
}
if !c.IsStaticArray(fn.Args[0]) {
// Unnest argument is not an ArrayExpr or ConstExpr wrapping a DArray.
if !c.IsStaticArray(fn.Args[0]) && !c.WrapsJSONArray(fn.Args[0]) {
// Argument is not an ArrayExpr or ConstExpr wrapping a DArray or DJSON.
return false
}
}
......@@ -1730,12 +1742,14 @@ func (c *CustomFuncs) CanConstructValuesFromZips(zip memo.ZipExpr) bool {
}
// ConstructValuesFromZips constructs a Values operator with the elements from
// the given ArrayExpr(s) (or ConstExpr(s) that wrap a DArray) in the given
// ZipExpr.
//
// The functions contained in the ZipExpr must be unnest functions with a single
// parameter each. The parameters of the unnest functions must be either
// ArrayExpr's or ConstExpr's wrapping DArrays.
// the given ArrayExpr(s) or the ConstExpr(s) that wrap a DArray or DJSON in the
// given ZipExpr.
//
// The functions contained in the ZipExpr must be unnest, json_array_elements or
// jsonb_array_elements functions with a single parameter each. The parameters
// of the unnest functions must be either ArrayExprs or ConstExprs wrapping
// DArrays. The parameters of the json_array_elements functions must be
// ConstExprs wrapping DJSON datums.
func (c *CustomFuncs) ConstructValuesFromZips(zip memo.ZipExpr) memo.RelExpr {
numCols := len(zip)
outColTypes := make([]*types.T, numCols)
......@@ -1743,17 +1757,27 @@ func (c *CustomFuncs) ConstructValuesFromZips(zip memo.ZipExpr) memo.RelExpr {
var outRows []memo.ScalarListExpr
// Get type and ColumnID of each column.
for i, zipItem := range zip {
arrExpr := zipItem.Fn.(*memo.FunctionExpr).Args[0]
outColTypes[i] = arrExpr.DataType().ArrayContents()
outColIDs[i] = zipItem.Cols[0]
for i := range zip {
function := zip[i].Fn.(*memo.FunctionExpr)
expr := function.Args[0]
switch function.Name {
case "unnest":
outColTypes[i] = expr.DataType().ArrayContents()
case "json_array_elements", "jsonb_array_elements":
outColTypes[i] = types.Jsonb
default:
panic(errors.AssertionFailedf("invalid function name: %v", function.Name))
}
outColIDs[i] = zip[i].Cols[0]
}
// addValToOutRows inserts a value into outRows at the given index.
addValToOutRows := func(expr opt.ScalarExpr, rIndex, cIndex int) {
if rIndex >= len(outRows) {
// If this is the largest column encountered so far, make a new row and
// fill with NullExpr's.
// fill with NullExprs.
outRows = append(outRows, make(memo.ScalarListExpr, numCols))
for i := 0; i < numCols; i++ {
outRows[rIndex][i] = c.f.ConstructNull(outColTypes[cIndex])
......@@ -1763,24 +1787,55 @@ func (c *CustomFuncs) ConstructValuesFromZips(zip memo.ZipExpr) memo.RelExpr {
}
// Fill outRows with values from the arrays in the ZipExpr.
for i, zipItem := range zip {
arrExpr := zipItem.Fn.(*memo.FunctionExpr).Args[0]
switch t := arrExpr.(type) {
for i := range zip {
function := zip[i].Fn.(*memo.FunctionExpr)
param := function.Args[0]
switch t := param.(type) {
case *memo.ArrayExpr:
for j, val := range t.Elems {
addValToOutRows(val, j, i)
}
case *memo.ConstExpr:
dArray := t.Value.(*tree.DArray)
for j, elem := range dArray.Array {
val := c.f.ConstructConstVal(elem, dArray.ParamTyp)
// Use a ValueGenerator to retrieve values from the datums wrapped
// in the ConstExpr. These generators are used at runtime to unnest
// values from regular and JSON arrays.
generator, err := function.Overload.Generator(c.f.evalCtx, tree.Datums{t.Value})
if err != nil {
panic(errors.AssertionFailedf("generator retrieval failed: %v", err))
}
if err = generator.Start(c.f.evalCtx.Context, c.f.evalCtx.Txn); err != nil {
panic(errors.AssertionFailedf("generator.Start failed: %v", err))
}
for j := 0; ; j++ {
hasNext, err := generator.Next(c.f.evalCtx.Context)
if err != nil {
panic(errors.AssertionFailedf("generator.Next failed: %v", err))
}
if !hasNext {
break
}
vals, err := generator.Values()
if err != nil {
panic(errors.AssertionFailedf("failed to retrieve values: %v", err))
}
if len(vals) != 1 {
panic(errors.AssertionFailedf(
"ValueGenerator didn't return exactly one value: %v", log.Safe(vals)))
}
val := c.f.ConstructConstVal(vals[0], vals[0].ResolvedType())
addValToOutRows(val, j, i)
}
generator.Close()
default:
panic(errors.AssertionFailedf("invalid parameter type"))
}
}
// Convert outRows (a slice of ScalarListExpr's) into a ScalarListExpr
// Convert outRows (a slice of ScalarListExprs) into a ScalarListExpr
// containing a tuple for each row.
tuples := make(memo.ScalarListExpr, len(outRows))
for i, row := range outRows {
......@@ -2204,6 +2259,17 @@ func (c *CustomFuncs) IsJSONScalar(value opt.ScalarExpr) bool {
return v.JSON.Type() != json.ObjectJSONType && v.JSON.Type() != json.ArrayJSONType
}
// WrapsJSONArray returns true if the given ScalarExpr is a ConstExpr that wraps
// a DJSON datum that contains a JSON array.
func (c *CustomFuncs) WrapsJSONArray(expr opt.ScalarExpr) bool {
if constExpr, ok := expr.(*memo.ConstExpr); ok {
if dJSON, ok := constExpr.Value.(*tree.DJSON); ok {
return dJSON.JSON.Type() == json.ArrayJSONType
}
}
return false
}
// MakeSingleKeyJSONObject returns a JSON object with one entry, mapping key to value.
func (c *CustomFuncs) MakeSingleKeyJSONObject(key, value opt.ScalarExpr) opt.ScalarExpr {
k := key.(*memo.ConstExpr).Value.(*tree.DString)
......
......@@ -2,12 +2,15 @@
# project_set.opt contains normalization rules for the ProjectSet operator.
# =============================================================================
# ConvertZipArraysToValues applies the unnest zip function to array inputs,
# converting them into a Values operator within an InnerJoinApply. This allows
# Values and decorrelation rules to fire. It is especially useful in cases where
# the array contents are passed as a PREPARE parameter, such as:
# ConvertZipArraysToValues applies the unnest, json_array_elements and
# jsonb_array_elements zip functions to array inputs, converting them into a
# Values operator within an InnerJoinApply. This allows Values and decorrelation
# rules to fire. It is especially useful in cases where the contents are passed
# as a PREPARE parameter, such as:
#
# SELECT * FROM xy WHERE y IN unnest($1)
# or:
# SELECT json_array_elements($1)
#
# The replace pattern is equivalent to the match pattern because the
# InnerJoinApply outputs every value in the array for every row in the input,
......
This diff is collapsed.
......@@ -542,31 +542,24 @@ WHERE
----
project
├── columns: secondary_id:6 "?column?":7
├── side-effects
├── inner-join (lookup idtable)
│ ├── columns: primary_id:1!null idtable.secondary_id:2!null data:3!null value:4 column5:5!null
│ ├── columns: primary_id:1!null idtable.secondary_id:2!null data:3!null value:4!null column5:5!null
│ ├── key columns: [1] = [1]
│ ├── lookup columns are key
│ ├── side-effects
│ ├── fd: (1)-->(2,3), (4)-->(5), (2)==(5), (5)==(2)
│ ├── inner-join (lookup [email protected]_id)
│ │ ├── columns: primary_id:1!null idtable.secondary_id:2!null value:4 column5:5!null
│ │ ├── columns: primary_id:1!null idtable.secondary_id:2!null value:4!null column5:5!null
│ │ ├── key columns: [5] = [2]
│ │ ├── side-effects
│ │ ├── fd: (4)-->(5), (1)-->(2), (2)==(5), (5)==(2)
│ │ ├── project
│ │ │ ├── columns: column5:5 value:4
│ │ │ ├── side-effects
│ │ │ ├── columns: column5:5 value:4!null
│ │ │ ├── cardinality: [2 - 2]
│ │ │ ├── fd: (4)-->(5)
│ │ │ ├── project-set
│ │ │ │ ├── columns: value:4
│ │ │ │ ├── side-effects
│ │ │ │ ├── values
│ │ │ │ │ ├── cardinality: [1 - 1]
│ │ │ │ │ ├── key: ()
│ │ │ │ │ └── ()
│ │ │ │ └── zip
│ │ │ │ └── json_array_elements('[{"person_id": "8e5dc104-9f38-4255-9283-fd080be16c57", "product_id": "a739c2d3-edec-413b-88d8-9c31d0414b1e"}, {"person_id": "308686c4-7415-4c2d-92d5-25b39a1c84e2", "product_id": "3f12802d-5b0f-43d7-a0d0-12ac8e88cb18"}]') [side-effects]
│ │ │ ├── values
│ │ │ │ ├── columns: value:4!null
│ │ │ │ ├── cardinality: [2 - 2]
│ │ │ │ ├── ('{"person_id": "8e5dc104-9f38-4255-9283-fd080be16c57", "product_id": "a739c2d3-edec-413b-88d8-9c31d0414b1e"}',)
│ │ │ │ └── ('{"person_id": "308686c4-7415-4c2d-92d5-25b39a1c84e2", "product_id": "3f12802d-5b0f-43d7-a0d0-12ac8e88cb18"}',)
│ │ │ └── projections
│ │ │ └── (value:4->>'secondary_id')::UUID [as=column5:5, outer=(4)]
│ │ └── filters (true)
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment