From 3808e0c1170d4f39b0bd9ac1643bd468fdbd6239 Mon Sep 17 00:00:00 2001 From: Masha Basmanova Date: Mon, 18 Sep 2023 20:27:09 -0700 Subject: [PATCH] Fix RowContainer::hash and equals for inputs of type UNKNOWN (#6619) Summary: Fixes https://github.com/facebookincubator/velox/issues/6616 Pull Request resolved: https://github.com/facebookincubator/velox/pull/6619 Reviewed By: bikramSingh91 Differential Revision: D49385313 Pulled By: mbasmanova fbshipit-source-id: fb16a50ced469791cfab088568829969c2302243 --- velox/exec/RowContainer.cpp | 8 ++++ velox/exec/RowContainer.h | 14 +++---- velox/exec/tests/RowContainerTest.cpp | 54 +++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 14 deletions(-) diff --git a/velox/exec/RowContainer.cpp b/velox/exec/RowContainer.cpp index d4f5bd38caf1..fad8a0fe8fc7 100644 --- a/velox/exec/RowContainer.cpp +++ b/velox/exec/RowContainer.cpp @@ -587,6 +587,14 @@ void RowContainer::hash( folly::Range rows, bool mix, uint64_t* result) { + if (typeKinds_[column] == TypeKind::UNKNOWN) { + for (auto i = 0; i < rows.size(); ++i) { + result[i] = mix ? bits::hashMix(result[i], BaseVector::kNullHash) + : BaseVector::kNullHash; + } + return; + } + bool nullable = column >= keyTypes_.size() || nullableKeys_; VELOX_DYNAMIC_TYPE_DISPATCH( hashTyped, diff --git a/velox/exec/RowContainer.h b/velox/exec/RowContainer.h index 9be9b99e78f5..682fd00a64e3 100644 --- a/velox/exec/RowContainer.h +++ b/velox/exec/RowContainer.h @@ -1338,18 +1338,18 @@ inline bool RowContainer::equals( RowColumn column, const DecodedVector& decoded, vector_size_t index) { + auto typeKind = decoded.base()->typeKind(); + if (typeKind == TypeKind::UNKNOWN) { + return isNullAt(row, column.nullByte(), column.nullMask()); + } + if (!mayHaveNulls) { return VELOX_DYNAMIC_TYPE_DISPATCH( - equalsNoNulls, - decoded.base()->typeKind(), - row, - column.offset(), - decoded, - index); + equalsNoNulls, typeKind, row, column.offset(), decoded, index); } else { return VELOX_DYNAMIC_TYPE_DISPATCH( equalsWithNulls, - decoded.base()->typeKind(), + typeKind, row, column.offset(), column.nullByte(), diff --git a/velox/exec/tests/RowContainerTest.cpp b/velox/exec/tests/RowContainerTest.cpp index 65bf8f3195cd..35214276d7be 100644 --- a/velox/exec/tests/RowContainerTest.cpp +++ b/velox/exec/tests/RowContainerTest.cpp @@ -256,6 +256,18 @@ class RowContainerTest : public exec::test::RowContainerTestBase { EXPECT_EQ(usage, sum); } + std::vector store( + RowContainer& rowContainer, + DecodedVector& decodedVector, + vector_size_t size) { + std::vector rows(size); + for (size_t row = 0; row < size; ++row) { + rows[row] = rowContainer.newRow(); + rowContainer.store(decodedVector, row, rows[row], 0); + } + return rows; + } + // Stores the input vector in Row Container, extracts it and compares. Returns // the container. std::unique_ptr roundTrip(const VectorPtr& input) { @@ -265,13 +277,8 @@ class RowContainerTest : public exec::test::RowContainerTestBase { // Store the vector in the rowContainer. auto rowContainer = std::make_unique(types, pool_.get()); auto size = input->size(); - SelectivityVector allRows(size); - std::vector rows(size); - DecodedVector decoded(*input, allRows); - for (size_t row = 0; row < size; ++row) { - rows[row] = rowContainer->newRow(); - rowContainer->store(decoded, row, rows[row], 0); - } + DecodedVector decoded(*input); + auto rows = store(*rowContainer, decoded, size); testExtractColumn(*rowContainer, rows, 0, input); return rowContainer; @@ -1227,3 +1234,36 @@ TEST_F(RowContainerTest, mixedFree) { data1->checkConsistency(); data2->checkConsistency(); } + +TEST_F(RowContainerTest, unknown) { + std::vector types = {UNKNOWN()}; + auto rowContainer = std::make_unique(types, pool_.get()); + + auto data = makeRowVector({ + makeAllNullFlatVector(5), + }); + + auto size = data->size(); + DecodedVector decoded(*data->childAt(0)); + auto rows = store(*rowContainer, decoded, size); + + std::vector hashes(size, 0); + rowContainer->hash( + 0, folly::Range(rows.data(), rows.size()), false /*mix*/, hashes.data()); + for (auto hash : hashes) { + ASSERT_EQ(BaseVector::kNullHash, hash); + } + + // Fill in hashes with sequential numbers: 0, 1, 2,.. + std::iota(hashes.begin(), hashes.end(), 0); + rowContainer->hash( + 0, folly::Range(rows.data(), rows.size()), true /*mix*/, hashes.data()); + for (auto i = 0; i < size; ++i) { + ASSERT_EQ(bits::hashMix(i, BaseVector::kNullHash), hashes[i]); + } + + for (size_t row = 0; row < size; ++row) { + ASSERT_TRUE(rowContainer->equals( + rows[row], rowContainer->columnAt(0), decoded, row)); + } +}