Skip to content

Commit

Permalink
Optimize array_constructor - v1 (facebookincubator#6566)
Browse files Browse the repository at this point in the history
Summary:

array_constructor is very slow: facebookincubator#5958 (comment)

array_constructor uses BaseVector::copyRanges, which is somewhat fast for arrays and maps, but very slow for primitive types:

```
FlatVector.h

  void copyRanges(
      const BaseVector* source,
      const folly::Range<const BaseVector::CopyRange*>& ranges) override {
    for (auto& range : ranges) {
      copy(source, range.targetIndex, range.sourceIndex, range.count);
    }
  }
```

FlatVector<T>::copy(source, rows, toSourceRow) is faster.

Switching from copyRanges to copy speeds up array_constructor for primitive types and structs significantly. Yet, this change makes arrays and maps slower.

The slowness is due to ArrayVector and MapVector not having implementation for copy(source, rows, toSourceRow). They rely on BaseVector::copy to translate rows + toSourceRow to ranges. This extra processing causes perf regression.

Hence, we use copy for primitive types and structs of these and copyRanges for everything else.

We also optimize FlatVector::copyRanges (which is used by Array/MapVector::copyRanges).

```
Before:

array_constructor_ARRAY_nullfree#facebookincubator#1                        16.80ms     59.53
array_constructor_ARRAY_nullfree#facebookincubator#2                        27.02ms     37.01
array_constructor_ARRAY_nullfree#facebookincubator#3                        38.03ms     26.30
array_constructor_ARRAY_nullfree##2_null                   52.86ms     18.92
array_constructor_ARRAY_nullfree##2_const                  54.97ms     18.19
array_constructor_ARRAY_nulls#facebookincubator#1                           30.61ms     32.66
array_constructor_ARRAY_nulls#facebookincubator#2                           55.01ms     18.18
array_constructor_ARRAY_nulls#facebookincubator#3                           80.69ms     12.39
array_constructor_ARRAY_nulls##2_null                      69.10ms     14.47
array_constructor_ARRAY_nulls##2_const                    103.85ms      9.63


After:

array_constructor_ARRAY_nullfree#facebookincubator#1                        15.43ms     64.80
array_constructor_ARRAY_nullfree#facebookincubator#2                        24.50ms     40.81
array_constructor_ARRAY_nullfree#facebookincubator#3                        35.12ms     28.47
array_constructor_ARRAY_nullfree##2_null                   54.52ms     18.34
array_constructor_ARRAY_nullfree##2_const                  43.28ms     23.10
array_constructor_ARRAY_nulls#facebookincubator#1                           28.60ms     34.96
array_constructor_ARRAY_nulls#facebookincubator#2                           50.82ms     19.68
array_constructor_ARRAY_nulls#facebookincubator#3                           70.31ms     14.22
array_constructor_ARRAY_nulls##2_null                      64.43ms     15.52
array_constructor_ARRAY_nulls##2_const                     80.71ms     12.39


Before:

array_constructor_INTEGER_nullfree#facebookincubator#1                      19.72ms     50.71
array_constructor_INTEGER_nullfree#facebookincubator#2                      34.51ms     28.97
array_constructor_INTEGER_nullfree#facebookincubator#3                      47.95ms     20.86
array_constructor_INTEGER_nullfree##2_null                 58.68ms     17.04
array_constructor_INTEGER_nullfree##2_const                45.15ms     22.15
array_constructor_INTEGER_nulls#facebookincubator#1                         29.99ms     33.34
array_constructor_INTEGER_nulls#facebookincubator#2                         55.32ms     18.08
array_constructor_INTEGER_nulls#facebookincubator#3                         78.53ms     12.73
array_constructor_INTEGER_nulls##2_null                    72.24ms     13.84
array_constructor_INTEGER_nulls##2_const                   71.13ms     14.06


After:

array_constructor_INTEGER_nullfree#facebookincubator#1                       3.49ms    286.59
array_constructor_INTEGER_nullfree#facebookincubator#2                       7.91ms    126.46
array_constructor_INTEGER_nullfree#facebookincubator#3                      11.99ms     83.41
array_constructor_INTEGER_nullfree##2_null                 12.57ms     79.55
array_constructor_INTEGER_nullfree##2_const                11.03ms     90.67
array_constructor_INTEGER_nulls#facebookincubator#1                          4.37ms    228.97
array_constructor_INTEGER_nulls#facebookincubator#2                          9.99ms    100.14
array_constructor_INTEGER_nulls#facebookincubator#3                         14.79ms     67.60
array_constructor_INTEGER_nulls##2_null                    12.21ms     81.92
array_constructor_INTEGER_nulls##2_const                   12.64ms     79.12


Before:

array_constructor_MAP_nullfree#facebookincubator#1                          17.34ms     57.65
array_constructor_MAP_nullfree#facebookincubator#2                          29.84ms     33.51
array_constructor_MAP_nullfree#facebookincubator#3                          41.51ms     24.09
array_constructor_MAP_nullfree##2_null                     56.57ms     17.68
array_constructor_MAP_nullfree##2_const                    71.68ms     13.95
array_constructor_MAP_nulls#facebookincubator#1                             36.22ms     27.61
array_constructor_MAP_nulls#facebookincubator#2                             68.18ms     14.67
array_constructor_MAP_nulls#facebookincubator#3                             95.12ms     10.51
array_constructor_MAP_nulls##2_null                        86.42ms     11.57
array_constructor_MAP_nulls##2_const                      120.10ms      8.33


After:

array_constructor_MAP_nullfree#facebookincubator#1                          17.38ms     57.53
array_constructor_MAP_nullfree#facebookincubator#2                          29.41ms     34.00
array_constructor_MAP_nullfree#facebookincubator#3                          38.30ms     26.11
array_constructor_MAP_nullfree##2_null                     58.52ms     17.09
array_constructor_MAP_nullfree##2_const                    48.62ms     20.57
array_constructor_MAP_nulls#facebookincubator#1                             30.60ms     32.68
array_constructor_MAP_nulls#facebookincubator#2                             53.94ms     18.54
array_constructor_MAP_nulls#facebookincubator#3                             86.48ms     11.56
array_constructor_MAP_nulls##2_null                        69.53ms     14.38
array_constructor_MAP_nulls##2_const                       87.56ms     11.42


Before:

array_constructor_ROW_nullfree#facebookincubator#1                          33.88ms     29.52
array_constructor_ROW_nullfree#facebookincubator#2                          62.00ms     16.13
array_constructor_ROW_nullfree#facebookincubator#3                          89.54ms     11.17
array_constructor_ROW_nullfree##2_null                     78.46ms     12.75
array_constructor_ROW_nullfree##2_const                    95.53ms     10.47
array_constructor_ROW_nulls#facebookincubator#1                             44.11ms     22.67
array_constructor_ROW_nulls#facebookincubator#2                            115.43ms      8.66
array_constructor_ROW_nulls#facebookincubator#3                            173.61ms      5.76
array_constructor_ROW_nulls##2_null                       130.40ms      7.67
array_constructor_ROW_nulls##2_const                      169.97ms      5.88

After:

array_constructor_ROW_nullfree#facebookincubator#1                           5.64ms    177.44
array_constructor_ROW_nullfree#facebookincubator#2                          14.40ms     69.44
array_constructor_ROW_nullfree#facebookincubator#3                          21.46ms     46.59
array_constructor_ROW_nullfree##2_null                     19.14ms     52.26
array_constructor_ROW_nullfree##2_const                    18.60ms     53.77
array_constructor_ROW_nulls#facebookincubator#1                             10.97ms     91.18
array_constructor_ROW_nulls#facebookincubator#2                             18.29ms     54.67
array_constructor_ROW_nulls#facebookincubator#3                             28.57ms     35.01
array_constructor_ROW_nulls##2_null                        25.10ms     39.84
array_constructor_ROW_nulls##2_const                       24.55ms     40.74
```

Differential Revision: D49269500
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Sep 15, 2023
1 parent c513fd4 commit 9a61532
Show file tree
Hide file tree
Showing 6 changed files with 332 additions and 158 deletions.
83 changes: 69 additions & 14 deletions velox/functions/prestosql/ArrayConstructor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,24 +55,55 @@ class ArrayConstructor : public exec::VectorFunction {
} else {
elementsResult->resize(baseOffset + numArgs * rows.countSelected());

std::vector<BaseVector::CopyRange> ranges;
ranges.reserve(rows.end());
if (shouldCopyRanges(elementsResult->type())) {
std::vector<BaseVector::CopyRange> ranges;
ranges.reserve(rows.end());

vector_size_t offset = baseOffset;
rows.applyToSelected([&](vector_size_t row) {
rawSizes[row] = numArgs;
rawOffsets[row] = offset;
ranges.push_back({row, offset, 1});
offset += numArgs;
});
vector_size_t offset = baseOffset;
rows.applyToSelected([&](vector_size_t row) {
rawSizes[row] = numArgs;
rawOffsets[row] = offset;
ranges.push_back({row, offset, 1});
offset += numArgs;
});

elementsResult->copyRanges(args[0].get(), ranges);
elementsResult->copyRanges(args[0].get(), ranges);

for (int i = 1; i < numArgs; i++) {
for (auto& range : ranges) {
++range.targetIndex;
for (int i = 1; i < numArgs; i++) {
for (auto& range : ranges) {
++range.targetIndex;
}
elementsResult->copyRanges(args[i].get(), ranges);
}
} else {
SelectivityVector targetRows(elementsResult->size(), false);
std::vector<vector_size_t> toSourceRow(elementsResult->size());

vector_size_t offset = baseOffset;
rows.applyToSelected([&](vector_size_t row) {
rawSizes[row] = numArgs;
rawOffsets[row] = offset;

targetRows.setValid(offset, true);
toSourceRow[offset] = row;

offset += numArgs;
});
targetRows.updateBounds();
elementsResult->copy(args[0].get(), targetRows, toSourceRow.data());

for (int i = 1; i < numArgs; i++) {
targetRows.clearAll();
vector_size_t offset = baseOffset;
rows.applyToSelected([&](vector_size_t row) {
targetRows.setValid(offset + i, true);
toSourceRow[offset + i] = row;
offset += numArgs;
});

targetRows.updateBounds();
elementsResult->copy(args[i].get(), targetRows, toSourceRow.data());
}
elementsResult->copyRanges(args[i].get(), ranges);
}
}
}
Expand All @@ -90,6 +121,30 @@ class ArrayConstructor : public exec::VectorFunction {
.build(),
};
}

private:
// BaseVector::copyRange is faster for arrays and maps and slower for
// primitive types. Check if 'type' is an array or map or contains an array or
// map. If so, return true, otherwise, false.
static bool shouldCopyRanges(const TypePtr& type) {
if (type->isPrimitiveType()) {
return false;
}

if (!type->isRow()) {
return true;
}

const auto& rowType = type->asRow();
for (const auto& child : rowType.children()) {
if (shouldCopyRanges(child)) {
return true;
}
}
return false;

return true;
}
};
} // namespace

Expand Down
94 changes: 94 additions & 0 deletions velox/functions/prestosql/benchmarks/ArrayConstructorBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/Benchmark.h>
#include <folly/init/Init.h>

#include "velox/benchmarks/ExpressionBenchmarkBuilder.h"
#include "velox/functions/lib/LambdaFunctionUtil.h"
#include "velox/functions/lib/benchmarks/FunctionBenchmarkBase.h"
#include "velox/functions/prestosql/ArrayFunctions.h"
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"

using namespace facebook::velox;
using namespace facebook::velox::exec;
using namespace facebook::velox::functions;

int main(int argc, char** argv) {
folly::init(&argc, &argv);

functions::prestosql::registerArrayFunctions();

ExpressionBenchmarkBuilder benchmarkBuilder;

auto* pool = benchmarkBuilder.pool();
auto& vm = benchmarkBuilder.vectorMaker();

auto createSet =
[&](const TypePtr& type, bool withNulls, const VectorPtr& constantInput) {
VectorFuzzer::Options options;
options.vectorSize = 1'000;
options.nullRatio = withNulls ? 0.2 : 0.0;

VectorFuzzer fuzzer(options, pool);
std::vector<VectorPtr> columns;
columns.push_back(fuzzer.fuzzFlat(type));
columns.push_back(fuzzer.fuzzFlat(type));
columns.push_back(fuzzer.fuzzFlat(type));
columns.push_back(
BaseVector::createNullConstant(type, options.vectorSize, pool));
columns.push_back(
BaseVector::wrapInConstant(options.vectorSize, 0, constantInput));

auto input = vm.rowVector({"c0", "c1", "c2", "n", "c"}, columns);

benchmarkBuilder
.addBenchmarkSet(
fmt::format(
"array_constructor_{}_{}",
mapTypeKindToName(type->kind()),
withNulls ? "nulls" : "nullfree"),
input)
.addExpression("1", "array_constructor(c0)")
.addExpression("2", "array_constructor(c0, c1)")
.addExpression("3", "array_constructor(c0, c1, c2)")
.addExpression("2_null", "array_constructor(c0, c1, n)")
.addExpression("2_const", "array_constructor(c0, c1, c)");
};

auto constantInteger = BaseVector::createConstant(INTEGER(), 11, 1, pool);
createSet(INTEGER(), true, constantInteger);
createSet(INTEGER(), false, constantInteger);

auto constantRow = vm.rowVector({
BaseVector::createConstant(INTEGER(), 11, 1, pool),
BaseVector::createConstant(DOUBLE(), 1.23, 1, pool),
});
createSet(ROW({INTEGER(), DOUBLE()}), true, constantRow);
createSet(ROW({INTEGER(), DOUBLE()}), false, constantRow);

auto constantArray = vm.arrayVector<int32_t>({{1, 2, 3, 4, 5}});
createSet(ARRAY(INTEGER()), true, constantArray);
createSet(ARRAY(INTEGER()), false, constantArray);

auto constantMap = vm.mapVector<int32_t, float>({{{1, 1.23}, {2, 2.34}}});
createSet(MAP(INTEGER(), REAL()), true, constantMap);
createSet(MAP(INTEGER(), REAL()), false, constantMap);

benchmarkBuilder.registerBenchmarks();

folly::runBenchmarks();
return 0;
}
35 changes: 20 additions & 15 deletions velox/vector/ComplexVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,17 +231,19 @@ void RowVector::copy(
[&](auto row) { rawMappedIndices[row] = indices[toSourceRow[row]]; });
}

auto baseSource = decodedSource.base()->as<RowVector>();
for (auto i = 0; i < childrenSize_; ++i) {
if (baseSource->childAt(i)) {
BaseVector::ensureWritable(
rows, type()->asRow().childAt(i), pool(), children_[i]);
children_[i]->copy(
baseSource->childAt(i)->loadedVector(),
nonNullRows,
rawMappedIndices ? rawMappedIndices : indices);
} else {
children_[i].reset();
if (source->typeKind() != TypeKind::UNKNOWN) {
auto baseSource = decodedSource.base()->as<RowVector>();
for (auto i = 0; i < childrenSize_; ++i) {
if (baseSource->childAt(i)) {
BaseVector::ensureWritable(
rows, type()->asRow().childAt(i), pool(), children_[i]);
children_[i]->copy(
baseSource->childAt(i)->loadedVector(),
nonNullRows,
rawMappedIndices ? rawMappedIndices : indices);
} else {
children_[i].reset();
}
}
}
}
Expand Down Expand Up @@ -319,10 +321,13 @@ void RowVector::copyRanges(
}
}
}
auto* rowSource = decoded.base()->as<RowVector>();
for (int i = 0; i < children_.size(); ++i) {
children_[i]->copyRanges(
rowSource->childAt(i)->loadedVector(), baseRanges);

if (source->typeKind() != TypeKind::UNKNOWN) {
auto* rowSource = decoded.base()->as<RowVector>();
for (int i = 0; i < children_.size(); ++i) {
children_[i]->copyRanges(
rowSource->childAt(i)->loadedVector(), baseRanges);
}
}
}
}
Expand Down
Loading

0 comments on commit 9a61532

Please sign in to comment.