Commit c3bdc350 authored by Yahor Yuzefovich's avatar Yahor Yuzefovich

colexec: improve materializer unit tests

This commit introduces a separate benchmark for the materializer as well
as improves the columnarize-materialize test to use random types. The
latter change allows us to remove another unit test because it became
redundant.

Release note: None
parent 7fa77eb4
......@@ -12,27 +12,32 @@ package colexec
import (
"context"
"fmt"
"testing"
"unsafe"
"github.com/cockroachdb/apd/v2"
"github.com/cockroachdb/cockroach/pkg/col/coldata"
"github.com/cockroachdb/cockroach/pkg/col/coldatatestutils"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
)
func TestColumnarizeMaterialize(t *testing.T) {
defer leaktest.AfterTest(t)()
// TODO(jordan,asubiotto): add randomness to this test as more types are supported.
typs := []*types.T{types.Int, types.Int}
rng, _ := randutil.NewPseudoRand()
nCols := 1 + rng.Intn(4)
var typs []*types.T
for len(typs) < nCols {
typs = append(typs, sqlbase.RandType(rng))
}
nRows := 10000
nCols := 2
rows := sqlbase.MakeIntRows(nRows, nCols)
rows := sqlbase.RandEncDatumRowsOfTypes(rng, nRows, typs)
input := execinfra.NewRepeatableRowSource(typs, rows)
ctx := context.Background()
......@@ -72,7 +77,7 @@ func TestColumnarizeMaterialize(t *testing.T) {
if row == nil {
t.Fatal("unexpected nil row")
}
for j := 0; j < nCols; j++ {
for j := range typs {
if row[j].Datum.Compare(&evalCtx, rows[i][j].Datum) != 0 {
t.Fatal("unequal rows", row, rows[i])
}
......@@ -87,35 +92,7 @@ func TestColumnarizeMaterialize(t *testing.T) {
}
}
func TestMaterializeTypes(t *testing.T) {
defer leaktest.AfterTest(t)()
// TODO(andyk): Make sure to add more types here. Consider iterating over
// types.OidToTypes list and also using randomly generated EncDatums.
typs := []*types.T{
types.Bool,
types.Int,
types.Float,
types.Decimal,
types.Date,
types.String,
types.Bytes,
types.Name,
types.Oid,
}
inputRow := sqlbase.EncDatumRow{
sqlbase.EncDatum{Datum: tree.DBoolTrue},
sqlbase.EncDatum{Datum: tree.NewDInt(tree.DInt(31))},
sqlbase.EncDatum{Datum: tree.NewDFloat(37.41)},
sqlbase.EncDatum{Datum: &tree.DDecimal{Decimal: *apd.New(43, 47)}},
sqlbase.EncDatum{Datum: tree.NewDDate(pgdate.MakeCompatibleDateFromDisk(53))},
sqlbase.EncDatum{Datum: tree.NewDString("hello")},
sqlbase.EncDatum{Datum: tree.NewDBytes("ciao")},
sqlbase.EncDatum{Datum: tree.NewDName("aloha")},
sqlbase.EncDatum{Datum: tree.NewDOid(59)},
}
input := execinfra.NewRepeatableRowSource(typs, sqlbase.EncDatumRows{inputRow})
func BenchmarkMaterializer(b *testing.B) {
ctx := context.Background()
st := cluster.MakeTestingClusterSettings()
evalCtx := tree.MakeTestingEvalContext(st)
......@@ -124,43 +101,76 @@ func TestMaterializeTypes(t *testing.T) {
Cfg: &execinfra.ServerConfig{Settings: st},
EvalCtx: &evalCtx,
}
c, err := NewColumnarizer(ctx, testAllocator, flowCtx, 0, input)
if err != nil {
t.Fatal(err)
}
outputToInputColIdx := make([]int, len(typs))
for i := range outputToInputColIdx {
outputToInputColIdx[i] = i
}
m, err := NewMaterializer(
flowCtx,
1, /* processorID */
c,
typs,
nil, /* output */
nil, /* metadataSourcesQueue */
nil, /* toClose */
nil, /* outputStatsToTrace */
nil, /* cancelFlow */
)
if err != nil {
t.Fatal(err)
}
m.Start(ctx)
row, meta := m.Next()
if meta != nil {
t.Fatalf("unexpected meta %+v", meta)
}
if row == nil {
t.Fatal("unexpected nil row")
}
for i := range inputRow {
inDatum := inputRow[i].Datum
outDatum := row[i].Datum
if inDatum.Compare(&evalCtx, outDatum) != 0 {
t.Fatal("unequal datums", inDatum, outDatum)
rng, _ := randutil.NewPseudoRand()
nBatches := 10
nRows := nBatches * coldata.BatchSize()
for _, typ := range []*types.T{types.Int, types.Float, types.Bytes} {
typs := []*types.T{typ}
nCols := len(typs)
for _, hasNulls := range []bool{false, true} {
for _, useSelectionVector := range []bool{false, true} {
b.Run(fmt.Sprintf("%s/hasNulls=%t/useSel=%t", typ, hasNulls, useSelectionVector), func(b *testing.B) {
nullProb := 0.0
if hasNulls {
nullProb = nullProbability
}
batch := testAllocator.NewMemBatch(typs)
for _, colVec := range batch.ColVecs() {
coldatatestutils.RandomVec(coldatatestutils.RandomVecArgs{
Rand: rng,
Vec: colVec,
N: coldata.BatchSize(),
NullProbability: nullProb,
BytesFixedLength: 8,
})
}
batch.SetLength(coldata.BatchSize())
if useSelectionVector {
batch.SetSelection(true)
sel := batch.Selection()
for i := 0; i < coldata.BatchSize(); i++ {
sel[i] = i
}
}
input := newFiniteBatchSource(batch, typs, nBatches)
b.SetBytes(int64(nRows * nCols * int(unsafe.Sizeof(int64(0)))))
for i := 0; i < b.N; i++ {
m, err := NewMaterializer(
flowCtx,
0, /* processorID */
input,
typs,
nil, /* output */
nil, /* metadataSourcesQueue */
nil, /* toClose */
nil, /* outputStatsToTrace */
nil, /* cancelFlow */
)
if err != nil {
b.Fatal(err)
}
m.Start(ctx)
foundRows := 0
for {
row, meta := m.Next()
if meta != nil {
b.Fatalf("unexpected metadata %v", meta)
}
if row == nil {
break
}
foundRows++
}
if foundRows != nRows {
b.Fatalf("expected %d rows, found %d", nRows, foundRows)
}
input.reset(nBatches)
}
})
}
}
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment