...
 
Commits (4)
  • Yahor Yuzefovich's avatar
    colexec: fix performance inefficiency in materializer · ef4768e1
    Yahor Yuzefovich authored
    We mistakenly were passing `sqlbase.DatumAlloc` by value, and not by
    pointer, and as a result we would always be allocating 16 datums but
    using only 1 - i.e. we were not only not pooling the allocations, but
    actually making a bunch of useless allocations as well.
    
    This inefficiency becomes noticeable when the vectorized query returns
    many rows and when we have wrapped processors and those processors get
    a lot of input rows - in all cases when we need to materialize a lot.
    For example, TPC-H query 16 sees about 10% improvement (it returns 18k
    rows) and TPC-DS query 6 sees 2x improvement (it has wrapped hash
    aggregator with a decimal column) with this fix.
    
    Release note (performance improvement): A performance inefficiency has
    been fixed in the vectorized execution engine which results in speed ups
    on all queries when run via the vectorized engine, with most noticeable
    gains on the queries that output many rows.
    ef4768e1
  • Yahor Yuzefovich's avatar
    colexec: fix type schema for LEFT SEMI and LEFT ANTI joins · a778474c
    Yahor Yuzefovich authored
    LEFT SEMI and LEFT ANTI joins output only all the columns from the left,
    however, we mistakenly put the columns from the right into
    `result.ColumnTypes`.
    
    Release note (bug fix): Previously, CockroachDB could encounter an
    internal error when a query with LEFT SEMI or LEFT ANTI join was
    performed via the vectorized execution engine in some cases, and now
    this has been fixed. This is likely to occur only with `vectorize=on`
    setting.
    a778474c
  • yuzefovich's avatar
    Merge pull request #48732 from yuzefovich/backport20.1-48669 · a0d8007e
    yuzefovich authored
    release-20.1: colexec: fix performance inefficiency in materializer
    a0d8007e
  • yuzefovich's avatar
    Merge pull request #48751 from yuzefovich/backport20.1-48659 · 28836d0a
    yuzefovich authored
    release-20.1: colexec: fix type schema for LEFT SEMI and LEFT ANTI joins
    28836d0a
......@@ -67,7 +67,7 @@ func (b *defaultBuiltinFuncOperator) Next(ctx context.Context) coldata.Batch {
for j := range b.argumentCols {
col := batch.ColVec(b.argumentCols[j])
b.row[j] = PhysicalTypeColElemToDatum(col, rowIdx, b.da, &b.columnTypes[b.argumentCols[j]])
b.row[j] = PhysicalTypeColElemToDatum(col, rowIdx, &b.da, &b.columnTypes[b.argumentCols[j]])
hasNulls = hasNulls || b.row[j] == tree.DNull
}
......
......@@ -896,7 +896,7 @@ func (rf *cFetcher) pushState(state fetcherState) {
// getDatumAt returns the converted datum object at the given (colIdx, rowIdx).
// This function is meant for tracing and should not be used in hot paths.
func (rf *cFetcher) getDatumAt(colIdx int, rowIdx int, typ types.T) tree.Datum {
return PhysicalTypeColElemToDatum(rf.machine.colvecs[colIdx], rowIdx, rf.table.da, &typ)
return PhysicalTypeColElemToDatum(rf.machine.colvecs[colIdx], rowIdx, &rf.table.da, &typ)
}
// processValue processes the state machine's current value component, setting
......
......@@ -883,7 +883,12 @@ func NewColOperator(
}
result.ColumnTypes = make([]types.T, len(leftLogTypes)+len(rightLogTypes))
copy(result.ColumnTypes, leftLogTypes)
copy(result.ColumnTypes[len(leftLogTypes):], rightLogTypes)
if core.HashJoiner.Type == sqlbase.JoinType_LEFT_SEMI ||
core.HashJoiner.Type == sqlbase.JoinType_LEFT_ANTI {
result.ColumnTypes = result.ColumnTypes[:len(leftLogTypes):len(leftLogTypes)]
} else {
copy(result.ColumnTypes[len(leftLogTypes):], rightLogTypes)
}
if !core.HashJoiner.OnExpr.Empty() && core.HashJoiner.Type == sqlbase.JoinType_INNER {
if err = result.planAndMaybeWrapOnExprAsFilter(ctx, flowCtx, core.HashJoiner.OnExpr, streamingMemAccount, processorConstructor); err != nil {
......@@ -948,6 +953,12 @@ func NewColOperator(
result.ToClose = append(result.ToClose, mj.(IdempotentCloser))
result.ColumnTypes = make([]types.T, len(leftLogTypes)+len(rightLogTypes))
copy(result.ColumnTypes, leftLogTypes)
if core.MergeJoiner.Type == sqlbase.JoinType_LEFT_SEMI ||
core.MergeJoiner.Type == sqlbase.JoinType_LEFT_ANTI {
result.ColumnTypes = result.ColumnTypes[:len(leftLogTypes):len(leftLogTypes)]
} else {
copy(result.ColumnTypes[len(leftLogTypes):], rightLogTypes)
}
copy(result.ColumnTypes[len(leftLogTypes):], rightLogTypes)
if onExpr != nil {
......
......@@ -172,7 +172,7 @@ func (m *Materializer) next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadat
typs := m.OutputTypes()
for colIdx := 0; colIdx < len(typs); colIdx++ {
col := m.batch.ColVec(colIdx)
m.row[colIdx].Datum = PhysicalTypeColElemToDatum(col, rowIdx, m.da, &typs[colIdx])
m.row[colIdx].Datum = PhysicalTypeColElemToDatum(col, rowIdx, &m.da, &typs[colIdx])
}
return m.ProcessRowHelper(m.row), nil
}
......
......@@ -252,8 +252,8 @@ func TestRandomComparisons(t *testing.T) {
coldata.RandomVec(rng, typ, bytesFixedLength, lVec, numTuples, 0)
coldata.RandomVec(rng, typ, bytesFixedLength, rVec, numTuples, 0)
for i := range lDatums {
lDatums[i] = PhysicalTypeColElemToDatum(lVec, i, da, ct)
rDatums[i] = PhysicalTypeColElemToDatum(rVec, i, da, ct)
lDatums[i] = PhysicalTypeColElemToDatum(lVec, i, &da, ct)
rDatums[i] = PhysicalTypeColElemToDatum(rVec, i, &da, ct)
}
for _, cmpOp := range []tree.ComparisonOperator{tree.EQ, tree.NE, tree.LT, tree.LE, tree.GT, tree.GE} {
for i := range lDatums {
......
......@@ -29,7 +29,7 @@ import (
// that this function handles nulls as well, so there is no need for a separate
// null check.
func PhysicalTypeColElemToDatum(
col coldata.Vec, rowIdx int, da sqlbase.DatumAlloc, ct *types.T,
col coldata.Vec, rowIdx int, da *sqlbase.DatumAlloc, ct *types.T,
) tree.Datum {
if col.MaybeHasNulls() {
if col.Nulls().NullAt(rowIdx) {
......
......@@ -58,3 +58,14 @@ SELECT * FROM t44798_0 NATURAL JOIN t44798_1
----
0
2
# Regression test for batch type schema prefix mismatch after LEFT ANTI join
# (48622).
statement ok
CREATE TABLE l (l INT PRIMARY KEY); INSERT INTO l VALUES (1), (2);
CREATE TABLE r (r INT PRIMARY KEY); INSERT INTO r VALUES (1)
query IB
SELECT *, true FROM (SELECT l FROM l WHERE l NOT IN (SELECT r FROM r))
----
2 true