diff --git a/velox/vector/FlatVector-inl.h b/velox/vector/FlatVector-inl.h index 05fe4cb9f22a..10b9a78e6547 100644 --- a/velox/vector/FlatVector-inl.h +++ b/velox/vector/FlatVector-inl.h @@ -141,6 +141,12 @@ void FlatVector::copyValuesAndNulls( const BaseVector* source, const SelectivityVector& rows, const vector_size_t* toSourceRow) { + if (source->typeKind() == TypeKind::UNKNOWN) { + auto* rawNulls = BaseVector::mutableRawNulls(); + rows.applyToSelected([&](auto row) { bits::setNull(rawNulls, row, true); }); + return; + } + source = source->loadedVector(); VELOX_CHECK( BaseVector::compatibleKind(BaseVector::typeKind(), source->typeKind())); @@ -158,35 +164,56 @@ void FlatVector::copyValuesAndNulls( } if (source->isFlatEncoding()) { - auto* sourceValues = source->typeKind() != TypeKind::UNKNOWN - ? source->asUnchecked>()->rawValues() - : nullptr; - if (toSourceRow) { - rows.applyToSelected([&](auto row) { - auto sourceRow = toSourceRow[row]; - if (sourceValues) { - rawValues_[row] = sourceValues[sourceRow]; - } - if (rawNulls) { - bits::setNull( - rawNulls, - row, - sourceNulls && bits::isBitNull(sourceNulls, sourceRow)); - } - }); + auto* flatSource = source->asUnchecked>(); + if (flatSource->values() == nullptr) { + // All source values are null. + rows.applyToSelected( + [&](auto row) { bits::setNull(rawNulls, row, true); }); + return; + } + + if constexpr (std::is_same_v) { + auto* rawValues = reinterpret_cast(rawValues_); + auto* sourceValues = flatSource->template rawValues(); + if (toSourceRow) { + rows.applyToSelected([&](auto row) { + int32_t sourceRow = toSourceRow[row]; + bits::setBit(rawValues, row, bits::isBitSet(sourceValues, sourceRow)); + }); + } else { + rows.applyToSelected([&](auto row) { + bits::setBit(rawValues, row, bits::isBitSet(sourceValues, row)); + }); + } } else { - rows.applyToSelected([&](vector_size_t row) { - if (row >= source->size()) { - return; - } - if (sourceValues) { - rawValues_[row] = sourceValues[row]; - } - if (rawNulls) { - bits::setNull( - rawNulls, row, sourceNulls && bits::isBitNull(sourceNulls, row)); + auto* sourceValues = flatSource->rawValues(); + if (toSourceRow) { + rows.applyToSelected([&](auto row) { + rawValues_[row] = sourceValues[toSourceRow[row]]; + }); + } else { + rows.applyToSelected( + [&](auto row) { rawValues_[row] = sourceValues[row]; }); + } + } + + if (rawNulls) { + if (!sourceNulls) { + rows.applyToSelected( + [&](vector_size_t row) { bits::setNull(rawNulls, row, false); }); + } else { + if (toSourceRow) { + rows.applyToSelected([&](auto row) { + auto sourceRow = toSourceRow[row]; + bits::setNull( + rawNulls, row, bits::isBitNull(sourceNulls, sourceRow)); + }); + } else { + rows.applyToSelected([&](vector_size_t row) { + bits::setNull(rawNulls, row, bits::isBitNull(sourceNulls, row)); + }); } - }); + } } } else if (source->isConstantEncoding()) { if (source->isNullAt(0)) { @@ -195,18 +222,32 @@ void FlatVector::copyValuesAndNulls( } auto constant = source->asUnchecked>(); T value = constant->valueAt(0); - rows.applyToSelected([&](int32_t row) { rawValues_[row] = value; }); + if constexpr (std::is_same_v) { + auto range = rows.asRange(); + auto* rawValues = reinterpret_cast(rawValues_); + if (value) { + bits::orBits(rawValues, range.bits(), range.begin(), range.end()); + } else { + bits::andWithNegatedBits( + rawValues, range.bits(), range.begin(), range.end()); + } + } else { + rows.applyToSelected([&](int32_t row) { rawValues_[row] = value; }); + } + rows.clearNulls(rawNulls); } else { - auto sourceVector = source->typeKind() != TypeKind::UNKNOWN - ? source->asUnchecked>() - : nullptr; + auto sourceVector = source->asUnchecked>(); rows.applyToSelected([&](auto row) { auto sourceRow = toSourceRow ? toSourceRow[row] : row; if (!source->isNullAt(sourceRow)) { - if (sourceVector) { + if constexpr (std::is_same_v) { + auto* rawValues = reinterpret_cast(rawValues_); + bits::setBit(rawValues, row, sourceVector->valueAt(sourceRow)); + } else { rawValues_[row] = sourceVector->valueAt(sourceRow); } + if (rawNulls) { bits::clearNull(rawNulls, row); } @@ -223,9 +264,18 @@ void FlatVector::copyValuesAndNulls( vector_size_t targetIndex, vector_size_t sourceIndex, vector_size_t count) { + if (source->typeKind() == TypeKind::UNKNOWN) { + auto* rawNulls = BaseVector::mutableRawNulls(); + for (auto i = 0; i < count; ++i) { + bits::setNull(rawNulls, targetIndex + i, true); + } + return; + } + if (count == 0) { return; } + source = source->loadedVector(); VELOX_CHECK( BaseVector::compatibleKind(BaseVector::typeKind(), source->typeKind())); @@ -244,26 +294,34 @@ void FlatVector::copyValuesAndNulls( } if (source->isFlatEncoding()) { - if (!source->values() || source->values()->size() == 0) { - // The vector must have all-null values. - VELOX_CHECK_EQ( - BaseVector::countNulls(source->nulls(), 0, source->size()), - source->size()); - } else if (source->typeKind() != TypeKind::UNKNOWN) { - auto flat = source->asUnchecked>(); + auto* flatSource = source->asUnchecked>(); + if (flatSource->values() == nullptr) { + // All source values are null. + for (auto i = 0; i < count; ++i) { + bits::setNull(rawNulls, targetIndex + i, true); + } + return; + } + + if constexpr (std::is_same_v) { + auto* rawValues = reinterpret_cast(rawValues_); + auto* sourceValues = flatSource->template rawValues(); + bits::copyBits(sourceValues, sourceIndex, rawValues, targetIndex, count); + } else { + const T* srcValues = flatSource->rawValues(); if (Buffer::is_pod_like_v) { memcpy( &rawValues_[targetIndex], - &flat->rawValues()[sourceIndex], + &srcValues[sourceIndex], count * sizeof(T)); } else { - const T* srcValues = flat->rawValues(); std::copy( srcValues + sourceIndex, srcValues + sourceIndex + count, rawValues_ + targetIndex); } } + if (rawNulls) { if (sourceNulls) { bits::copyBits(sourceNulls, sourceIndex, rawNulls, targetIndex, count); @@ -279,9 +337,16 @@ void FlatVector::copyValuesAndNulls( } auto constant = source->asUnchecked>(); T value = constant->valueAt(0); - for (auto row = targetIndex; row < targetIndex + count; ++row) { - rawValues_[row] = value; + + if constexpr (std::is_same_v) { + auto* rawValues = reinterpret_cast(rawValues_); + bits::fillBits(rawValues, targetIndex, targetIndex + count, value); + } else { + for (auto row = targetIndex; row < targetIndex + count; ++row) { + rawValues_[row] = value; + } } + if (rawNulls) { bits::fillBits( rawNulls, targetIndex, targetIndex + count, bits::kNotNull); @@ -290,7 +355,16 @@ void FlatVector::copyValuesAndNulls( auto sourceVector = source->asUnchecked>(); for (int32_t i = 0; i < count; ++i) { if (!source->isNullAt(sourceIndex + i)) { - rawValues_[targetIndex + i] = sourceVector->valueAt(sourceIndex + i); + if constexpr (std::is_same_v) { + auto* rawValues = reinterpret_cast(rawValues_); + bits::setBit( + rawValues, + targetIndex + i, + sourceVector->valueAt(sourceIndex + i)); + } else { + rawValues_[targetIndex + i] = sourceVector->valueAt(sourceIndex + i); + } + if (rawNulls) { bits::clearNull(rawNulls, targetIndex + i); } diff --git a/velox/vector/FlatVector.cpp b/velox/vector/FlatVector.cpp index d3cafc598401..e4ccdfcee62e 100644 --- a/velox/vector/FlatVector.cpp +++ b/velox/vector/FlatVector.cpp @@ -46,164 +46,6 @@ void FlatVector::set(vector_size_t idx, bool value) { bits::setBit(reinterpret_cast(rawValues_), idx, value); } -template <> -void FlatVector::copyValuesAndNulls( - const BaseVector* source, - const SelectivityVector& rows, - const vector_size_t* toSourceRow) { - if (source->typeKind() == TypeKind::UNKNOWN) { - rows.applyToSelected([&](auto row) { setNull(row, true); }); - return; - } - - source = source->loadedVector(); - VELOX_CHECK( - BaseVector::compatibleKind(BaseVector::typeKind(), source->typeKind())); - VELOX_CHECK(BaseVector::length_ >= rows.end()); - const uint64_t* sourceNulls = source->rawNulls(); - uint64_t* rawNulls = const_cast(BaseVector::rawNulls_); - if (source->mayHaveNulls()) { - rawNulls = BaseVector::mutableRawNulls(); - } - uint64_t* rawValues = reinterpret_cast(rawValues_); - if (source->isFlatEncoding()) { - auto flat = source->asUnchecked>(); - auto* sourceValues = source->typeKind() != TypeKind::UNKNOWN - ? flat->rawValues() - : nullptr; - if (!sourceValues) { - // All rows in source vector are null. - rows.applyToSelected( - [&](auto row) { bits::setNull(rawNulls, row, true); }); - } else { - if (toSourceRow) { - rows.applyToSelected([&](auto row) { - int32_t sourceRow = toSourceRow[row]; - if (sourceValues) { - bits::setBit( - rawValues, row, bits::isBitSet(sourceValues, sourceRow)); - } - if (rawNulls) { - bits::setNull( - rawNulls, - row, - sourceNulls && bits::isBitNull(sourceNulls, sourceRow)); - } - }); - } else { - rows.applyToSelected([&](auto row) { - if (sourceValues) { - bits::setBit(rawValues, row, bits::isBitSet(sourceValues, row)); - } - if (rawNulls) { - bits::setNull( - rawNulls, - row, - sourceNulls && bits::isBitNull(sourceNulls, row)); - } - }); - } - } - } else if (source->isConstantEncoding()) { - auto constant = source->asUnchecked>(); - if (constant->isNullAt(0)) { - addNulls(nullptr, rows); - return; - } - bool value = constant->valueAt(0); - auto range = rows.asRange(); - if (value) { - bits::orBits(rawValues, range.bits(), range.begin(), range.end()); - } else { - bits::andWithNegatedBits( - rawValues, range.bits(), range.begin(), range.end()); - } - rows.clearNulls(rawNulls); - } else { - auto sourceVector = source->asUnchecked>(); - rows.applyToSelected([&](auto row) { - int32_t sourceRow = toSourceRow ? toSourceRow[row] : row; - if (!source->isNullAt(sourceRow)) { - bits::setBit(rawValues, row, sourceVector->valueAt(sourceRow)); - if (rawNulls) { - bits::clearNull(rawNulls, row); - } - } else { - bits::setNull(rawNulls, row); - } - }); - } -} - -template <> -void FlatVector::copyValuesAndNulls( - const BaseVector* source, - vector_size_t targetIndex, - vector_size_t sourceIndex, - vector_size_t count) { - if (count == 0) { - return; - } - source = source->loadedVector(); - VELOX_CHECK( - BaseVector::compatibleKind(BaseVector::typeKind(), source->typeKind())); - VELOX_CHECK(source->size() >= sourceIndex + count); - VELOX_CHECK(BaseVector::length_ >= targetIndex + count); - - const uint64_t* sourceNulls = source->rawNulls(); - auto rawValues = reinterpret_cast(rawValues_); - uint64_t* rawNulls = const_cast(BaseVector::rawNulls_); - if (source->mayHaveNulls()) { - rawNulls = BaseVector::mutableRawNulls(); - } - if (source->isFlatEncoding()) { - if (source->typeKind() != TypeKind::UNKNOWN) { - auto* sourceValues = - source->asUnchecked>()->rawValues(); - bits::copyBits(sourceValues, sourceIndex, rawValues, targetIndex, count); - } - if (rawNulls) { - if (sourceNulls) { - bits::copyBits(sourceNulls, sourceIndex, rawNulls, targetIndex, count); - } else { - bits::fillBits( - rawNulls, targetIndex, targetIndex + count, bits::kNotNull); - } - } - } else if (source->isConstantEncoding()) { - auto constant = source->asUnchecked>(); - if (constant->isNullAt(0)) { - bits::fillBits(rawNulls, targetIndex, targetIndex + count, bits::kNull); - return; - } - bool value = constant->valueAt(0); - bits::fillBits(rawValues, targetIndex, targetIndex + count, value); - if (rawNulls) { - bits::fillBits( - rawNulls, targetIndex, targetIndex + count, bits::kNotNull); - } - } else { - auto sourceVector = source->typeKind() != TypeKind::UNKNOWN - ? source->asUnchecked>() - : nullptr; - for (int32_t i = 0; i < count; ++i) { - if (!source->isNullAt(sourceIndex + i)) { - if (sourceVector) { - bits::setBit( - rawValues, - targetIndex + i, - sourceVector->valueAt(sourceIndex + i)); - } - if (rawNulls) { - bits::clearNull(rawNulls, targetIndex + i); - } - } else { - bits::setNull(rawNulls, targetIndex + i); - } - } - } -} - template <> Buffer* FlatVector::getBufferWithSpace(vector_size_t size) { VELOX_DCHECK_GE(stringBuffers_.size(), stringBufferSet_.size()); diff --git a/velox/vector/FlatVector.h b/velox/vector/FlatVector.h index 8df10806f4f6..61cf67e9a2f3 100644 --- a/velox/vector/FlatVector.h +++ b/velox/vector/FlatVector.h @@ -525,19 +525,6 @@ template <> void FlatVector::validate( const VectorValidateOptions& options) const; -template <> -void FlatVector::copyValuesAndNulls( - const BaseVector* source, - const SelectivityVector& rows, - const vector_size_t* toSourceRow); - -template <> -void FlatVector::copyValuesAndNulls( - const BaseVector* source, - vector_size_t targetIndex, - vector_size_t sourceIndex, - vector_size_t count); - template <> Buffer* FlatVector::getBufferWithSpace(vector_size_t size); diff --git a/velox/vector/tests/VectorTest.cpp b/velox/vector/tests/VectorTest.cpp index c828ab7cbd8e..265ed9a53928 100644 --- a/velox/vector/tests/VectorTest.cpp +++ b/velox/vector/tests/VectorTest.cpp @@ -31,8 +31,8 @@ #include "velox/vector/VectorTypeUtils.h" #include "velox/vector/tests/utils/VectorTestBase.h" -using namespace facebook::velox; -using facebook::velox::ComplexType; +namespace facebook::velox { +namespace { // LazyVector loader for testing. Minimal implementation that documents the API // contract. @@ -123,47 +123,6 @@ class VectorTest : public testing::Test, public test::VectorTestBase { return base; } - template - VectorPtr createBias(vector_size_t size, bool withNulls) { - using T = typename TypeTraits::NativeType; - using TBias = typename TypeTraits::NativeType; - BufferPtr buffer; - BufferPtr values = AlignedBuffer::allocate(size, pool_.get()); - values->setSize(size * sizeof(TBias)); - BufferPtr nulls; - uint64_t* rawNulls = nullptr; - if (withNulls) { - int32_t bytes = BaseVector::byteSize(size); - nulls = AlignedBuffer::allocate(bytes, pool_.get()); - rawNulls = nulls->asMutable(); - memset(rawNulls, bits::kNotNullByte, bytes); - nulls->setSize(bytes); - } - auto rawValues = values->asMutable(); - int32_t numNulls = 0; - constexpr int32_t kBias = 100; - for (int32_t i = 0; i < size; ++i) { - if (withNulls && i % 3 == 0) { - ++numNulls; - bits::setNull(rawNulls, i); - } else { - rawValues[i] = testValue(i, buffer) - kBias; - } - } - return std::make_shared>( - pool_.get(), - nulls, - size, - BiasKind, - std::move(values), - kBias, - SimpleVectorStats{}, - std::nullopt, - numNulls, - false, - size * sizeof(T)); - } - VectorPtr createRow(int32_t numRows, bool withNulls) { auto childType = ROW({"child_bigint", "child_string"}, {BIGINT(), VARCHAR()}); @@ -565,7 +524,9 @@ class VectorTest : public testing::Test, public test::VectorTestBase { testCopy(lazy, level - 1); } - void testCopyFromUnknown(const VectorPtr& vector) { + void testCopyFromAllNulls( + const VectorPtr& vector, + const VectorPtr& allNullSource) { SCOPED_TRACE(vector->toString()); const vector_size_t size = 1'000; @@ -575,8 +536,6 @@ class VectorTest : public testing::Test, public test::VectorTestBase { // Save a copy of the 'vector' to compare results after copy. auto vectorCopy = BaseVector::copy(*vector); - auto unknown = makeAllNullFlatVector(size); - // Copy every 3-rd row. SelectivityVector rowsToCopy(size, false); for (auto i = 0; i < size; i += 3) { @@ -597,7 +556,7 @@ class VectorTest : public testing::Test, public test::VectorTestBase { } } - vector->copy(unknown.get(), rowsToCopy, toSourceRow.data()); + vector->copy(allNullSource.get(), rowsToCopy, toSourceRow.data()); rowsToCopy.applyToSelected( [&](auto row) { EXPECT_TRUE(vector->isNullAt(row)) << "at " << row; }); @@ -610,7 +569,9 @@ class VectorTest : public testing::Test, public test::VectorTestBase { }); } - void testCopySingleRangeFromUnknown(const VectorPtr& vector) { + void testCopySingleRangeFromAllNulls( + const VectorPtr& vector, + const VectorPtr& allNullSource) { SCOPED_TRACE(vector->toString()); const vector_size_t size = 1'000; @@ -620,9 +581,7 @@ class VectorTest : public testing::Test, public test::VectorTestBase { // Save a copy of the 'vector' to compare results after copy. auto vectorCopy = BaseVector::copy(*vector); - auto unknown = makeAllNullFlatVector(size); - - vector->copy(unknown.get(), 40, 33, 78); + vector->copy(allNullSource.get(), 40, 33, 78); for (auto i = 0; i < size; ++i) { if (i < 40 || i >= 40 + 78) { @@ -636,7 +595,9 @@ class VectorTest : public testing::Test, public test::VectorTestBase { } } - void testCopyRangesFromUnknown(const VectorPtr& vector) { + void testCopyRangesFromAllNulls( + const VectorPtr& vector, + const VectorPtr& allNullSource) { SCOPED_TRACE(vector->toString()); const vector_size_t size = 1'000; @@ -646,8 +607,6 @@ class VectorTest : public testing::Test, public test::VectorTestBase { // Save a copy of the 'vector' to compare results after copy. auto vectorCopy = BaseVector::copy(*vector); - auto unknown = makeAllNullFlatVector(size); - std::vector rangesToCopy = { {0, 0, 7}, {10, 12, 5}, @@ -664,7 +623,7 @@ class VectorTest : public testing::Test, public test::VectorTestBase { {0, 960, 40}, }; - vector->copyRanges(unknown.get(), rangesToCopy); + vector->copyRanges(allNullSource.get(), rangesToCopy); for (const auto& range : rangesToCopy) { for (auto i = 0; i < range.count; ++i) { @@ -1239,18 +1198,130 @@ TEST_F(VectorTest, copyToAllNullsFlatVector) { } } +template +static VectorPtr createAllNullsFlatVector( + vector_size_t size, + memory::MemoryPool* pool, + const TypePtr& type) { + using T = typename TypeTraits::NativeType; + + return std::make_shared>( + pool, + type, + allocateNulls(size, pool, bits::kNull), + size, + nullptr, + std::vector()); +} + +VectorPtr createAllNullsVector( + const TypePtr& type, + vector_size_t size, + memory::MemoryPool* pool) { + auto kind = type->kind(); + switch (kind) { + case TypeKind::ROW: { + std::vector children(type->size(), nullptr); + return std::make_shared( + pool, type, allocateNulls(size, pool, bits::kNull), size, children); + } + case TypeKind::ARRAY: + return std::make_shared( + pool, + type, + allocateNulls(size, pool, bits::kNull), + size, + allocateSizes(size, pool), + allocateSizes(size, pool), + nullptr); + case TypeKind::MAP: + return std::make_shared( + pool, + type, + allocateNulls(size, pool, bits::kNull), + size, + allocateSizes(size, pool), + allocateSizes(size, pool), + nullptr, + nullptr); + default: + return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH( + createAllNullsFlatVector, kind, size, pool, type); + } +} + +TEST_F(VectorTest, copyFromAllNulls) { + vector_size_t size = 1'000; + + auto test = [&](const auto& makeVectorFunc) { + auto vector = makeVectorFunc(); + auto allNullSource = + createAllNullsVector(vector->type(), vector->size(), pool()); + + testCopyFromAllNulls(vector, allNullSource); + + vector = makeVectorFunc(); + testCopySingleRangeFromAllNulls(vector, allNullSource); + + vector = makeVectorFunc(); + testCopyRangesFromAllNulls(vector, allNullSource); + }; + + // Copy to BIGINT. + test([&]() { + return makeFlatVector(size, [](auto row) { return row; }); + }); + + // Copy to BOOLEAN. + test([&]() { + return makeFlatVector(size, [](auto row) { return row % 7 == 3; }); + }); + + // Copy to VARCHAR. + test([&]() { + return makeFlatVector( + size, [](auto row) { return std::string(row % 17, 'x'); }); + }); + + // Copy to ARRAY. + test([&]() { + return makeArrayVector( + size, [](auto row) { return row % 7; }, [](auto row) { return row; }); + }); + + // Copy to MAP. + test([&]() { + return makeMapVector( + size, + [](auto row) { return row % 7; }, + [](auto row) { return row; }, + [](auto row) { return row * 0.1; }); + }); + + // TODO Enable after fixing + // https://github.com/facebookincubator/velox/issues/6612 + // // Copy to ROW. + // test([&]() { + // return makeRowVector({ + // makeFlatVector(size, [](auto row) { return row; }), + // makeFlatVector(size, [](auto row) { return row * 0.1; }), + // }); + // }); +} + TEST_F(VectorTest, copyFromUnknown) { vector_size_t size = 1'000; + auto unknown = makeAllNullFlatVector(size); auto test = [&](const auto& makeVectorFunc) { auto vector = makeVectorFunc(); - testCopyFromUnknown(vector); + testCopyFromAllNulls(vector, unknown); vector = makeVectorFunc(); - testCopySingleRangeFromUnknown(vector); + testCopySingleRangeFromAllNulls(vector, unknown); vector = makeVectorFunc(); - testCopyRangesFromUnknown(vector); + testCopyRangesFromAllNulls(vector, unknown); }; // Copy to BIGINT. @@ -2910,3 +2981,6 @@ TEST_F(VectorTest, containsNullAtStructs) { EXPECT_TRUE(data->containsNullAt(4)); EXPECT_FALSE(data->containsNullAt(5)); } + +} // namespace +} // namespace facebook::velox