diff --git a/CMakeLists.txt b/CMakeLists.txt index d5fab0c3edc3..7fdd6990eb07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,7 +175,7 @@ message("Setting CMAKE_CXX_FLAGS=${SCRIPT_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SCRIPT_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D USE_VELOX_COMMON_BASE") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D HAS_UNCAUGHT_EXCEPTIONS") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D HAS_UNCAUGHT_EXCEPTIONS -fPIC") # Under Ninja, we are able to designate certain targets large enough to require # restricted parallelism. diff --git a/velox/core/PlanNode.h b/velox/core/PlanNode.h index 3b9efd728cee..e514077a372d 100644 --- a/velox/core/PlanNode.h +++ b/velox/core/PlanNode.h @@ -18,7 +18,7 @@ #include "velox/connectors/Connector.h" #include "velox/core/Expressions.h" -#include "velox/vector/arrow/Bridge.h" +#include "velox/vector/arrow/c/Bridge.h" namespace facebook::velox::core { diff --git a/velox/exec/ArrowStream.h b/velox/exec/ArrowStream.h index 62783f56788d..43744b243f50 100644 --- a/velox/exec/ArrowStream.h +++ b/velox/exec/ArrowStream.h @@ -16,7 +16,7 @@ #include "velox/core/PlanNode.h" #include "velox/exec/Operator.h" -#include "velox/vector/arrow/Abi.h" +#include "velox/vector/arrow/c/abi.h" namespace facebook::velox::exec { diff --git a/velox/functions/prestosql/aggregates/AverageAggregate.cpp b/velox/functions/prestosql/aggregates/AverageAggregate.cpp index aa2059f476f1..de212de872cb 100644 --- a/velox/functions/prestosql/aggregates/AverageAggregate.cpp +++ b/velox/functions/prestosql/aggregates/AverageAggregate.cpp @@ -13,8 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "velox/exec/Aggregate.h" -#include "velox/expression/FunctionSignature.h" +#include "velox/functions/prestosql/aggregates/AverageAggregate.h" #include "velox/functions/prestosql/aggregates/AggregateNames.h" #include "velox/functions/prestosql/aggregates/AverageDecimalAccumulator.h" #include "velox/vector/ComplexVector.h" diff --git a/velox/functions/prestosql/aggregates/AverageAggregate.h b/velox/functions/prestosql/aggregates/AverageAggregate.h new file mode 100644 index 000000000000..490dde456e3d --- /dev/null +++ b/velox/functions/prestosql/aggregates/AverageAggregate.h @@ -0,0 +1,347 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/exec/Aggregate.h" +#include "velox/expression/FunctionSignature.h" +#include "velox/functions/prestosql/aggregates/AggregateNames.h" +#include "velox/vector/ComplexVector.h" +#include "velox/vector/DecodedVector.h" +#include "velox/vector/FlatVector.h" + +namespace facebook::velox::aggregate { + +struct SumCount { + double sum{0}; + int64_t count{0}; +}; + +// Partial aggregation produces a pair of sum and count. +// Final aggregation takes a pair of sum and count and returns a real for real +// input types and double for other input types. +// T is the input type for partial aggregation. Not used for final aggregation. +template +class AverageAggregate : public exec::Aggregate { + public: + explicit AverageAggregate(TypePtr resultType) : exec::Aggregate(resultType) {} + + int32_t accumulatorFixedWidthSize() const override { + return sizeof(SumCount); + } + + void initializeNewGroups( + char** groups, + folly::Range indices) override { + setAllNulls(groups, indices); + for (auto i : indices) { + new (groups[i] + offset_) SumCount(); + } + } + + void finalize(char** /* unused */, int32_t /* unused */) override {} + + void extractValues(char** groups, int32_t numGroups, VectorPtr* result) + override { + // Real input type in Presto has special case and returns REAL, not DOUBLE. + if (resultType_->isDouble()) { + extractValuesImpl(groups, numGroups, result); + } else { + extractValuesImpl(groups, numGroups, result); + } + } + + void extractAccumulators(char** groups, int32_t numGroups, VectorPtr* result) + override { + auto rowVector = (*result)->as(); + auto sumVector = rowVector->childAt(0)->asFlatVector(); + auto countVector = rowVector->childAt(1)->asFlatVector(); + + rowVector->resize(numGroups); + sumVector->resize(numGroups); + countVector->resize(numGroups); + uint64_t* rawNulls = getRawNulls(rowVector); + + int64_t* rawCounts = countVector->mutableRawValues(); + double* rawSums = sumVector->mutableRawValues(); + for (auto i = 0; i < numGroups; ++i) { + char* group = groups[i]; + if (isNull(group)) { + rowVector->setNull(i, true); + } else { + clearNull(rawNulls, i); + auto* sumCount = accumulator(group); + rawCounts[i] = sumCount->count; + rawSums[i] = sumCount->sum; + } + } + } + + void addRawInput( + char** groups, + const SelectivityVector& rows, + const std::vector& args, + bool /*mayPushdown*/) override { + decodedRaw_.decode(*args[0], rows); + if (decodedRaw_.isConstantMapping()) { + if (!decodedRaw_.isNullAt(0)) { + auto value = decodedRaw_.valueAt(0); + rows.applyToSelected( + [&](vector_size_t i) { updateNonNullValue(groups[i], value); }); + } + } else if (decodedRaw_.mayHaveNulls()) { + rows.applyToSelected([&](vector_size_t i) { + if (decodedRaw_.isNullAt(i)) { + return; + } + updateNonNullValue(groups[i], decodedRaw_.valueAt(i)); + }); + } else if (!exec::Aggregate::numNulls_ && decodedRaw_.isIdentityMapping()) { + auto data = decodedRaw_.data(); + rows.applyToSelected([&](vector_size_t i) { + updateNonNullValue(groups[i], data[i]); + }); + } else { + rows.applyToSelected([&](vector_size_t i) { + updateNonNullValue(groups[i], decodedRaw_.valueAt(i)); + }); + } + } + + void addSingleGroupRawInput( + char* group, + const SelectivityVector& rows, + const std::vector& args, + bool /*mayPushdown*/) override { + decodedRaw_.decode(*args[0], rows); + + if (decodedRaw_.isConstantMapping()) { + if (!decodedRaw_.isNullAt(0)) { + const T value = decodedRaw_.valueAt(0); + const auto numRows = rows.countSelected(); + updateNonNullValue(group, numRows, value * numRows); + } + } else if (decodedRaw_.mayHaveNulls()) { + rows.applyToSelected([&](vector_size_t i) { + if (!decodedRaw_.isNullAt(i)) { + updateNonNullValue(group, decodedRaw_.valueAt(i)); + } + }); + } else if (!exec::Aggregate::numNulls_ && decodedRaw_.isIdentityMapping()) { + const T* data = decodedRaw_.data(); + double totalSum = 0; + rows.applyToSelected([&](vector_size_t i) { totalSum += data[i]; }); + updateNonNullValue(group, rows.countSelected(), totalSum); + } else { + double totalSum = 0; + rows.applyToSelected( + [&](vector_size_t i) { totalSum += decodedRaw_.valueAt(i); }); + updateNonNullValue(group, rows.countSelected(), totalSum); + } + } + + void addIntermediateResults( + char** groups, + const SelectivityVector& rows, + const std::vector& args, + bool /* mayPushdown */) override { + decodedPartial_.decode(*args[0], rows); + auto baseRowVector = dynamic_cast(decodedPartial_.base()); + auto baseSumVector = baseRowVector->childAt(0)->as>(); + auto baseCountVector = + baseRowVector->childAt(1)->as>(); + + if (decodedPartial_.isConstantMapping()) { + if (!decodedPartial_.isNullAt(0)) { + auto decodedIndex = decodedPartial_.index(0); + auto count = baseCountVector->valueAt(decodedIndex); + auto sum = baseSumVector->valueAt(decodedIndex); + rows.applyToSelected([&](vector_size_t i) { + updateNonNullValue(groups[i], count, sum); + }); + } + } else if (decodedPartial_.mayHaveNulls()) { + rows.applyToSelected([&](vector_size_t i) { + if (decodedPartial_.isNullAt(i)) { + return; + } + auto decodedIndex = decodedPartial_.index(i); + updateNonNullValue( + groups[i], + baseCountVector->valueAt(decodedIndex), + baseSumVector->valueAt(decodedIndex)); + }); + } else { + rows.applyToSelected([&](vector_size_t i) { + auto decodedIndex = decodedPartial_.index(i); + updateNonNullValue( + groups[i], + baseCountVector->valueAt(decodedIndex), + baseSumVector->valueAt(decodedIndex)); + }); + } + } + + void addSingleGroupIntermediateResults( + char* group, + const SelectivityVector& rows, + const std::vector& args, + bool /* mayPushdown */) override { + decodedPartial_.decode(*args[0], rows); + auto baseRowVector = dynamic_cast(decodedPartial_.base()); + auto baseSumVector = baseRowVector->childAt(0)->as>(); + auto baseCountVector = + baseRowVector->childAt(1)->as>(); + + if (decodedPartial_.isConstantMapping()) { + if (!decodedPartial_.isNullAt(0)) { + auto decodedIndex = decodedPartial_.index(0); + const auto numRows = rows.countSelected(); + auto totalCount = baseCountVector->valueAt(decodedIndex) * numRows; + auto totalSum = baseSumVector->valueAt(decodedIndex) * numRows; + updateNonNullValue(group, totalCount, totalSum); + } + } else if (decodedPartial_.mayHaveNulls()) { + rows.applyToSelected([&](vector_size_t i) { + if (!decodedPartial_.isNullAt(i)) { + auto decodedIndex = decodedPartial_.index(i); + updateNonNullValue( + group, + baseCountVector->valueAt(decodedIndex), + baseSumVector->valueAt(decodedIndex)); + } + }); + } else { + double totalSum = 0; + int64_t totalCount = 0; + rows.applyToSelected([&](vector_size_t i) { + auto decodedIndex = decodedPartial_.index(i); + totalCount += baseCountVector->valueAt(decodedIndex); + totalSum += baseSumVector->valueAt(decodedIndex); + }); + updateNonNullValue(group, totalCount, totalSum); + } + } + + private: + // partial + template + inline void updateNonNullValue(char* group, T value) { + if constexpr (tableHasNulls) { + exec::Aggregate::clearNull(group); + } + accumulator(group)->sum += value; + accumulator(group)->count += 1; + } + + template + inline void updateNonNullValue(char* group, int64_t count, double sum) { + if constexpr (tableHasNulls) { + exec::Aggregate::clearNull(group); + } + accumulator(group)->sum += sum; + accumulator(group)->count += count; + } + + inline SumCount* accumulator(char* group) { + return exec::Aggregate::value(group); + } + + template + void extractValuesImpl(char** groups, int32_t numGroups, VectorPtr* result) { + auto vector = (*result)->as>(); + VELOX_CHECK(vector); + vector->resize(numGroups); + uint64_t* rawNulls = getRawNulls(vector); + + TResult* rawValues = vector->mutableRawValues(); + for (int32_t i = 0; i < numGroups; ++i) { + char* group = groups[i]; + if (isNull(group)) { + vector->setNull(i, true); + } else { + clearNull(rawNulls, i); + auto* sumCount = accumulator(group); + rawValues[i] = (TResult)sumCount->sum / sumCount->count; + } + } + } + + DecodedVector decodedRaw_; + DecodedVector decodedPartial_; +}; + +void checkSumCountRowType(TypePtr type, const std::string& errorMessage) { + VELOX_CHECK_EQ(type->kind(), TypeKind::ROW, "{}", errorMessage); + VELOX_CHECK_EQ( + type->childAt(0)->kind(), TypeKind::DOUBLE, "{}", errorMessage); + VELOX_CHECK_EQ( + type->childAt(1)->kind(), TypeKind::BIGINT, "{}", errorMessage); +} + +bool registerAverageAggregate(const std::string& name) { + std::vector> signatures; + + for (const auto& inputType : {"smallint", "integer", "bigint", "double"}) { + signatures.push_back(exec::AggregateFunctionSignatureBuilder() + .returnType("double") + .intermediateType("row(double,bigint)") + .argumentType(inputType) + .build()); + } + // Real input type in Presto has special case and returns REAL, not DOUBLE. + signatures.push_back(exec::AggregateFunctionSignatureBuilder() + .returnType("real") + .intermediateType("row(double,bigint)") + .argumentType("real") + .build()); + + exec::registerAggregateFunction( + name, + std::move(signatures), + [name]( + core::AggregationNode::Step step, + const std::vector& argTypes, + const TypePtr& resultType) -> std::unique_ptr { + VELOX_CHECK_LE( + argTypes.size(), 1, "{} takes at most one argument", name); + auto inputType = argTypes[0]; + if (exec::isRawInput(step)) { + switch (inputType->kind()) { + case TypeKind::SMALLINT: + return std::make_unique>(resultType); + case TypeKind::INTEGER: + return std::make_unique>(resultType); + case TypeKind::BIGINT: + return std::make_unique>(resultType); + case TypeKind::REAL: + return std::make_unique>(resultType); + case TypeKind::DOUBLE: + return std::make_unique>(resultType); + default: + VELOX_FAIL( + "Unknown input type for {} aggregation {}", + name, + inputType->kindName()); + } + } else { + checkSumCountRowType( + inputType, + "Input type for final aggregation must be (sum:double, count:bigint) struct"); + return std::make_unique>(resultType); + } + }); + return true; +} + +} // namespace facebook::velox::aggregate diff --git a/velox/functions/prestosql/aggregates/CMakeLists.txt b/velox/functions/prestosql/aggregates/CMakeLists.txt index d002393ac35e..30872f83a442 100644 --- a/velox/functions/prestosql/aggregates/CMakeLists.txt +++ b/velox/functions/prestosql/aggregates/CMakeLists.txt @@ -21,6 +21,7 @@ add_library( ArbitraryAggregate.cpp ArrayAggAggregate.cpp AverageAggregate.cpp + AverageAggregate.h BitwiseAggregates.cpp BoolAggregates.cpp CountIfAggregate.cpp @@ -34,6 +35,7 @@ add_library( MinMaxAggregates.cpp MinMaxByAggregates.cpp CountAggregate.cpp + CountAggregate.h PrestoHasher.cpp SingleValueAccumulator.cpp SumAggregate.cpp diff --git a/velox/functions/prestosql/aggregates/CountAggregate.cpp b/velox/functions/prestosql/aggregates/CountAggregate.cpp index 4cb5528d260b..9f8d203c217f 100644 --- a/velox/functions/prestosql/aggregates/CountAggregate.cpp +++ b/velox/functions/prestosql/aggregates/CountAggregate.cpp @@ -13,10 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "velox/common/base/Exceptions.h" -#include "velox/expression/FunctionSignature.h" +#include "velox/functions/prestosql/aggregates/CountAggregate.h" #include "velox/functions/prestosql/aggregates/AggregateNames.h" -#include "velox/functions/prestosql/aggregates/SumAggregate.h" namespace facebook::velox::aggregate::prestosql { diff --git a/velox/functions/prestosql/aggregates/CountAggregate.h b/velox/functions/prestosql/aggregates/CountAggregate.h new file mode 100644 index 000000000000..9a54c894cf02 --- /dev/null +++ b/velox/functions/prestosql/aggregates/CountAggregate.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "velox/common/base/Exceptions.h" +#include "velox/expression/FunctionSignature.h" +#include "velox/functions/prestosql/aggregates/AggregateNames.h" +#include "velox/functions/prestosql/aggregates/SumAggregate.h" + +namespace facebook::velox::aggregate { + +class CountAggregate : public SimpleNumericAggregate { + using BaseAggregate = SimpleNumericAggregate; + + public: + explicit CountAggregate() : BaseAggregate(BIGINT()) {} + + int32_t accumulatorFixedWidthSize() const override { + return sizeof(int64_t); + } + + void initializeNewGroups( + char** groups, + folly::Range indices) override { + for (auto i : indices) { + // result of count is never null + *value(groups[i]) = (int64_t)0; + } + } + + void extractValues(char** groups, int32_t numGroups, VectorPtr* result) + override { + BaseAggregate::doExtractValues(groups, numGroups, result, [&](char* group) { + return *value(group); + }); + } + + void addRawInput( + char** groups, + const SelectivityVector& rows, + const std::vector& args, + bool /*mayPushdown*/) override { + if (args.empty()) { + rows.applyToSelected([&](vector_size_t i) { addToGroup(groups[i], 1); }); + return; + } + + DecodedVector decoded(*args[0], rows); + if (decoded.isConstantMapping()) { + if (!decoded.isNullAt(0)) { + rows.applyToSelected( + [&](vector_size_t i) { addToGroup(groups[i], 1); }); + } + } else if (decoded.mayHaveNulls()) { + rows.applyToSelected([&](vector_size_t i) { + if (decoded.isNullAt(i)) { + return; + } + addToGroup(groups[i], 1); + }); + } else { + rows.applyToSelected([&](vector_size_t i) { addToGroup(groups[i], 1); }); + } + } + + void addIntermediateResults( + char** groups, + const SelectivityVector& rows, + const std::vector& args, + bool /*mayPushdown*/) override { + decodedIntermediate_.decode(*args[0], rows); + rows.applyToSelected([&](vector_size_t i) { + addToGroup(groups[i], decodedIntermediate_.valueAt(i)); + }); + } + + void addSingleGroupRawInput( + char* group, + const SelectivityVector& rows, + const std::vector& args, + bool /*mayPushdown*/) override { + if (args.empty()) { + addToGroup(group, rows.size()); + return; + } + + DecodedVector decoded(*args[0], rows); + if (decoded.isConstantMapping()) { + if (!decoded.isNullAt(0)) { + addToGroup(group, rows.size()); + } + } else if (decoded.mayHaveNulls()) { + int64_t nonNullCount = 0; + rows.applyToSelected([&](vector_size_t i) { + if (!decoded.isNullAt(i)) { + ++nonNullCount; + } + }); + addToGroup(group, nonNullCount); + } else { + addToGroup(group, rows.size()); + } + } + + void addSingleGroupIntermediateResults( + char* group, + const SelectivityVector& rows, + const std::vector& args, + bool /*mayPushdown*/) override { + decodedIntermediate_.decode(*args[0], rows); + + int64_t count = 0; + if (decodedIntermediate_.mayHaveNulls()) { + rows.applyToSelected([&](vector_size_t i) { + if (!decodedIntermediate_.isNullAt(i)) { + count += decodedIntermediate_.valueAt(i); + } + }); + } else { + rows.applyToSelected([&](vector_size_t i) { + count += decodedIntermediate_.valueAt(i); + }); + } + + addToGroup(group, count); + } + + private: + inline void addToGroup(char* group, int64_t count) { + *value(group) += count; + } + + DecodedVector decodedIntermediate_; +}; + +bool registerCountAggregate(const std::string& name) { + std::vector> signatures{ + exec::AggregateFunctionSignatureBuilder() + .returnType("bigint") + .intermediateType("bigint") + .build(), + exec::AggregateFunctionSignatureBuilder() + .typeVariable("T") + .returnType("bigint") + .intermediateType("bigint") + .argumentType("T") + .build(), + }; + + exec::registerAggregateFunction( + name, + std::move(signatures), + [name]( + core::AggregationNode::Step step, + const std::vector& argTypes, + const TypePtr& + /*resultType*/) -> std::unique_ptr { + VELOX_CHECK_LE( + argTypes.size(), 1, "{} takes at most one argument", name); + return std::make_unique(); + }); + return true; +} + +} // namespace facebook::velox::aggregate diff --git a/velox/vector/arrow/CMakeLists.txt b/velox/vector/arrow/CMakeLists.txt index 31038b1a41f5..d1258790f5b9 100644 --- a/velox/vector/arrow/CMakeLists.txt +++ b/velox/vector/arrow/CMakeLists.txt @@ -11,11 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -add_library(velox_arrow_bridge Bridge.cpp) -target_link_libraries(velox_arrow_bridge velox_memory velox_type velox_buffer - velox_exception) - -if(VELOX_BUILD_TESTING AND VELOX_ENABLE_ARROW) - add_subdirectory(tests) -endif() +add_subdirectory(c) diff --git a/velox/vector/arrow/Bridge.cpp b/velox/vector/arrow/c/Bridge.cpp similarity index 99% rename from velox/vector/arrow/Bridge.cpp rename to velox/vector/arrow/c/Bridge.cpp index 4953293dcef8..a41df4c71272 100644 --- a/velox/vector/arrow/Bridge.cpp +++ b/velox/vector/arrow/c/Bridge.cpp @@ -14,8 +14,7 @@ * limitations under the License. */ -#include "velox/vector/arrow/Bridge.h" - +#include "velox/vector/arrow/c/Bridge.h" #include "velox/buffer/Buffer.h" #include "velox/common/base/BitUtil.h" #include "velox/common/base/CheckedArithmetic.h" diff --git a/velox/vector/arrow/Bridge.h b/velox/vector/arrow/c/Bridge.h similarity index 85% rename from velox/vector/arrow/Bridge.h rename to velox/vector/arrow/c/Bridge.h index f49603474c19..a5e00a59125d 100644 --- a/velox/vector/arrow/Bridge.h +++ b/velox/vector/arrow/c/Bridge.h @@ -18,17 +18,14 @@ #include "velox/common/memory/Memory.h" #include "velox/vector/BaseVector.h" +#include "velox/vector/arrow/c/abi.h" -/// These 2 definitions should be included by user from either -/// 1. or -/// 2. "velox/vector/arrow/Abi.h" struct ArrowArray; struct ArrowSchema; namespace facebook::velox { - -/// Export a generic Velox Vector to an ArrowArray, as defined by Arrow's C data -/// interface: +/// Export a generic Velox Vector to an ArrowArray, as defined by Arrow's C +/// data interface: /// /// https://arrow.apache.org/docs/format/CDataInterface.html /// @@ -36,13 +33,13 @@ namespace facebook::velox { /// heap or stack), and after usage, the standard REQUIRES the client to call /// the release() function (or memory will leak). /// -/// After exporting, the ArrowArray will hold ownership to the underlying Vector -/// being referenced, so the consumer does not need to explicitly hold on to the -/// input Vector shared_ptr. +/// After exporting, the ArrowArray will hold ownership to the underlying +/// Vector being referenced, so the consumer does not need to explicitly hold +/// on to the input Vector shared_ptr. /// /// The function takes a memory pool where allocations will be made (in cases -/// where the conversion is not zero-copy, e.g. for strings) and throws in case -/// the conversion is not implemented yet. +/// where the conversion is not zero-copy, e.g. for strings) and throws in +/// case the conversion is not implemented yet. /// /// Example usage: /// @@ -83,10 +80,10 @@ void exportToArrow(const VectorPtr&, ArrowSchema&); /// Import an ArrowSchema into a Velox Type object. /// -/// This function does the exact opposite of the function above. TypePtr carries -/// all buffers they need to represent types, so after this function returns, -/// the client is free to release any buffers associated with the input -/// ArrowSchema object. +/// This function does the exact opposite of the function above. TypePtr +/// carries all buffers they need to represent types, so after this function +/// returns, the client is free to release any buffers associated with the +/// input ArrowSchema object. /// /// The function throws in case there was no valid conversion available. /// @@ -107,8 +104,8 @@ TypePtr importFromArrow(const ArrowSchema& arrowSchema); /// both (buffer and type). A memory pool is also required, since all vectors /// carry a pointer to it, but not really used in most cases - unless the /// conversion itself requires a new allocation. In most cases no new -/// allocations are required, unless for arrays of varchars (or varbinaries) and -/// complex types written out of order. +/// allocations are required, unless for arrays of varchars (or varbinaries) +/// and complex types written out of order. /// /// The new Velox vector returned contains only references to the underlying /// buffers, so it's the client's responsibility to ensure the buffer's @@ -134,14 +131,14 @@ VectorPtr importFromArrowAsViewer( /// ownership over the input data. /// /// Similar to importFromArrowAsViewer but the ownership of arrowSchema and -/// arrowArray will be taken over. Specifically, the returned Vector will own a -/// copy of arrowSchema and arrowArray. -/// -/// The inputs arrowSchema and arrowArray will be marked as released by setting -/// their release callback to nullptr -/// (https://arrow.apache.org/docs/format/CDataInterface.html). Afterwards, the -/// returned Vector will be responsible for calling the release callbacks when -/// destructed. +/// arrowArray will be taken over. Specifically, the returned Vector will own +/// a copy of arrowSchema and arrowArray. +/// +/// The inputs arrowSchema and arrowArray will be marked as released by +/// setting their release callback to nullptr +/// (https://arrow.apache.org/docs/format/CDataInterface.html). Afterwards, +/// the returned Vector will be responsible for calling the release callbacks +/// when destructed. VectorPtr importFromArrowAsOwner( ArrowSchema& arrowSchema, ArrowArray& arrowArray, diff --git a/velox/vector/arrow/c/CMakeLists.txt b/velox/vector/arrow/c/CMakeLists.txt new file mode 100644 index 000000000000..a4975d6f570c --- /dev/null +++ b/velox/vector/arrow/c/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +add_library(velox_arrow_bridge Bridge.cpp) + +target_link_libraries(velox_arrow_bridge velox_memory velox_type velox_buffer + velox_exception) + +if(${VELOX_BUILD_TESTING}) + add_subdirectory(tests) +endif() diff --git a/velox/vector/arrow/Abi.h b/velox/vector/arrow/c/abi.h similarity index 100% rename from velox/vector/arrow/Abi.h rename to velox/vector/arrow/c/abi.h diff --git a/velox/vector/arrow/tests/ArrowBridgeArrayTest.cpp b/velox/vector/arrow/c/tests/ArrowBridgeArrayTest.cpp similarity index 99% rename from velox/vector/arrow/tests/ArrowBridgeArrayTest.cpp rename to velox/vector/arrow/c/tests/ArrowBridgeArrayTest.cpp index c219210570a1..2709927f0f56 100644 --- a/velox/vector/arrow/tests/ArrowBridgeArrayTest.cpp +++ b/velox/vector/arrow/c/tests/ArrowBridgeArrayTest.cpp @@ -22,7 +22,7 @@ #include "velox/common/base/Nulls.h" #include "velox/core/QueryCtx.h" -#include "velox/vector/arrow/Bridge.h" +#include "velox/vector/arrow/c/Bridge.h" #include "velox/vector/tests/utils/VectorMaker.h" namespace { diff --git a/velox/vector/arrow/tests/ArrowBridgeSchemaTest.cpp b/velox/vector/arrow/c/tests/ArrowBridgeSchemaTest.cpp similarity index 99% rename from velox/vector/arrow/tests/ArrowBridgeSchemaTest.cpp rename to velox/vector/arrow/c/tests/ArrowBridgeSchemaTest.cpp index 3e1a3080c78f..ce81bedd4c7a 100644 --- a/velox/vector/arrow/tests/ArrowBridgeSchemaTest.cpp +++ b/velox/vector/arrow/c/tests/ArrowBridgeSchemaTest.cpp @@ -19,8 +19,9 @@ #include #include +#include "velox/common/base/Nulls.h" #include "velox/common/base/tests/GTestUtils.h" -#include "velox/vector/arrow/Bridge.h" +#include "velox/vector/arrow/c/Bridge.h" namespace { diff --git a/velox/vector/arrow/tests/CMakeLists.txt b/velox/vector/arrow/c/tests/CMakeLists.txt similarity index 100% rename from velox/vector/arrow/tests/CMakeLists.txt rename to velox/vector/arrow/c/tests/CMakeLists.txt