From 3d8a8812bdd9b4f5fb6b2ca81253e023ddfaef0f Mon Sep 17 00:00:00 2001 From: Masha Basmanova Date: Thu, 16 Nov 2023 19:42:54 -0800 Subject: [PATCH] Fix conversion of VARBINARY values to variant in QueryAssertions.* (#7618) Summary: VARBINARY result from Velox and DuckDB used to be converted to variant with TypeKind::VARCHAR causing confusing failures in the AggregationFuzzer. Pull Request resolved: https://github.com/facebookincubator/velox/pull/7618 Reviewed By: xiaoxmeng Differential Revision: D51416568 Pulled By: mbasmanova fbshipit-source-id: e673da4d668dc93f933a40a0095ab1d5dd3506e9 --- velox/exec/tests/QueryAssertionsTest.cpp | 17 ++++ velox/exec/tests/utils/QueryAssertions.cpp | 80 +++++++++---------- .../tests/BloomFilterAggAggregateTest.cpp | 24 ++---- 3 files changed, 64 insertions(+), 57 deletions(-) diff --git a/velox/exec/tests/QueryAssertionsTest.cpp b/velox/exec/tests/QueryAssertionsTest.cpp index 37730846e005..93ea8009532d 100644 --- a/velox/exec/tests/QueryAssertionsTest.cpp +++ b/velox/exec/tests/QueryAssertionsTest.cpp @@ -452,4 +452,21 @@ TEST_F(QueryAssertionsTest, nullVariant) { assertQuery(plan, "SELECT * FROM tmp"); } +TEST_F(QueryAssertionsTest, varbinary) { + auto data = makeRowVector({makeFlatVector( + {"Short string", "Longer strings...", "abc"}, VARBINARY())}); + + auto rowType = asRowType(data->type()); + + createDuckDbTable({data}); + + auto duckResult = duckDbQueryRunner_.execute("SELECT * FROM tmp", rowType); + ASSERT_EQ(duckResult.size(), data->size()); + ASSERT_EQ(duckResult.begin()->begin()->kind(), TypeKind::VARBINARY); + ASSERT_TRUE(assertEqualResults(duckResult, rowType, {data})); + + auto plan = PlanBuilder().values({data}).planNode(); + assertQuery(plan, "SELECT * FROM tmp"); +} + } // namespace facebook::velox::test diff --git a/velox/exec/tests/utils/QueryAssertions.cpp b/velox/exec/tests/utils/QueryAssertions.cpp index 42704ad43c69..4e3c733a54c7 100644 --- a/velox/exec/tests/utils/QueryAssertions.cpp +++ b/velox/exec/tests/utils/QueryAssertions.cpp @@ -188,41 +188,40 @@ ::duckdb::Value duckValueAt( } template -velox::variant -variantAt(::duckdb::DataChunk* dataChunk, int32_t row, int32_t column) { +variant variantAt(::duckdb::DataChunk* dataChunk, int32_t row, int32_t column) { using T = typename KindToFlatVector::WrapperType; - return velox::variant(dataChunk->GetValue(column, row).GetValue()); + return variant(dataChunk->GetValue(column, row).GetValue()); } template <> -velox::variant variantAt( +variant variantAt( ::duckdb::DataChunk* dataChunk, int32_t row, int32_t column) { - return velox::variant( + return variant( StringView(::duckdb::StringValue::Get(dataChunk->GetValue(column, row)))); } template <> -velox::variant variantAt( +variant variantAt( ::duckdb::DataChunk* dataChunk, int32_t row, int32_t column) { - return velox::variant( + return variant::binary( StringView(::duckdb::StringValue::Get(dataChunk->GetValue(column, row)))); } template <> -velox::variant variantAt( +variant variantAt( ::duckdb::DataChunk* dataChunk, int32_t row, int32_t column) { - return velox::variant::timestamp(duckdbTimestampToVelox( + return variant::timestamp(duckdbTimestampToVelox( dataChunk->GetValue(column, row).GetValue<::duckdb::timestamp_t>())); } template -velox::variant variantAt(const ::duckdb::Value& value) { +variant variantAt(const ::duckdb::Value& value) { if (value.type() == ::duckdb::LogicalType::INTERVAL) { return ::duckdb::Interval::GetMicro(value.GetValue<::duckdb::interval_t>()); } else if (value.type() == ::duckdb::LogicalType::DATE) { @@ -231,34 +230,32 @@ velox::variant variantAt(const ::duckdb::Value& value) { // NOTE: duckdb only support native cpp type for GetValue so we need to use // DeepCopiedType instead of WrapperType here. using T = typename TypeTraits::DeepCopiedType; - return velox::variant(value.GetValue()); + return variant(value.GetValue()); } } template <> -velox::variant variantAt(const ::duckdb::Value& value) { - return velox::variant::timestamp( +variant variantAt(const ::duckdb::Value& value) { + return variant::timestamp( duckdbTimestampToVelox(value.GetValue<::duckdb::timestamp_t>())); } template <> -velox::variant variantAt(const ::duckdb::Value& value) { - return velox::variant(StringView(::duckdb::StringValue::Get(value))); +variant variantAt(const ::duckdb::Value& value) { + return variant(StringView(::duckdb::StringValue::Get(value))); } template <> -velox::variant variantAt(const ::duckdb::Value& value) { - return velox::variant(StringView(::duckdb::StringValue::Get(value))); +variant variantAt(const ::duckdb::Value& value) { + return variant::binary(StringView(::duckdb::StringValue::Get(value))); } variant nullVariant(const TypePtr& type) { return variant(type->kind()); } -velox::variant rowVariantAt( - const ::duckdb::Value& vector, - const TypePtr& rowType) { - std::vector values; +variant rowVariantAt(const ::duckdb::Value& vector, const TypePtr& rowType) { + std::vector values; const auto& structValue = ::duckdb::StructValue::GetChildren(vector); for (size_t i = 0; i < structValue.size(); ++i) { auto currChild = structValue[i]; @@ -274,12 +271,10 @@ velox::variant rowVariantAt( values.push_back(value); } } - return velox::variant::row(std::move(values)); + return variant::row(std::move(values)); } -velox::variant mapVariantAt( - const ::duckdb::Value& vector, - const TypePtr& mapType) { +variant mapVariantAt(const ::duckdb::Value& vector, const TypePtr& mapType) { std::map map; const auto& mapValue = ::duckdb::StructValue::GetChildren(vector); @@ -309,10 +304,10 @@ velox::variant mapVariantAt( } map.insert({variantKey, variantValue}); } - return velox::variant::map(map); + return variant::map(map); } -velox::variant arrayVariantAt( +variant arrayVariantAt( const ::duckdb::Value& vector, const TypePtr& arrayType) { std::vector array; @@ -334,7 +329,7 @@ velox::variant arrayVariantAt( array.push_back(variant); } } - return velox::variant::array(std::move(array)); + return variant::array(std::move(array)); } std::vector materialize( @@ -389,14 +384,19 @@ std::vector materialize( } template -velox::variant variantAt(VectorPtr vector, int32_t row) { +variant variantAt(VectorPtr vector, int32_t row) { using T = typename KindToFlatVector::WrapperType; - return velox::variant(vector->as>()->valueAt(row)); + return variant(vector->as>()->valueAt(row)); +} + +template <> +variant variantAt(VectorPtr vector, int32_t row) { + return variant::binary(vector->as>()->valueAt(row)); } variant variantAt(const VectorPtr& vector, vector_size_t row); -velox::variant arrayVariantAt(const VectorPtr& vector, vector_size_t row) { +variant arrayVariantAt(const VectorPtr& vector, vector_size_t row) { auto arrayVector = vector->wrappedVector()->as(); auto& elements = arrayVector->elements(); @@ -404,16 +404,16 @@ velox::variant arrayVariantAt(const VectorPtr& vector, vector_size_t row) { auto offset = arrayVector->offsetAt(wrappedRow); auto size = arrayVector->sizeAt(wrappedRow); - std::vector array; + std::vector array; array.reserve(size); for (auto i = 0; i < size; i++) { auto innerRow = offset + i; array.push_back(variantAt(elements, innerRow)); } - return velox::variant::array(array); + return variant::array(array); } -velox::variant mapVariantAt(const VectorPtr& vector, vector_size_t row) { +variant mapVariantAt(const VectorPtr& vector, vector_size_t row) { auto mapVector = vector->wrappedVector()->as(); auto& mapKeys = mapVector->mapKeys(); auto& mapValues = mapVector->mapValues(); @@ -429,18 +429,18 @@ velox::variant mapVariantAt(const VectorPtr& vector, vector_size_t row) { auto value = variantAt(mapValues, innerRow); map.insert({key, value}); } - return velox::variant::map(map); + return variant::map(map); } -velox::variant rowVariantAt(const VectorPtr& vector, vector_size_t row) { +variant rowVariantAt(const VectorPtr& vector, vector_size_t row) { auto rowValues = vector->wrappedVector()->as(); auto wrappedRow = vector->wrappedIndex(row); - std::vector values; + std::vector values; for (auto& child : rowValues->children()) { values.push_back(variantAt(child, wrappedRow)); } - return velox::variant::row(std::move(values)); + return variant::row(std::move(values)); } variant variantAt(const VectorPtr& vector, vector_size_t row) { @@ -1404,9 +1404,7 @@ std::shared_ptr assertQuery( return result.first->task(); } -velox::variant readSingleValue( - const core::PlanNodePtr& plan, - int32_t maxDrivers) { +variant readSingleValue(const core::PlanNodePtr& plan, int32_t maxDrivers) { CursorParameters params; params.planNode = plan; params.maxDrivers = maxDrivers; diff --git a/velox/functions/sparksql/aggregates/tests/BloomFilterAggAggregateTest.cpp b/velox/functions/sparksql/aggregates/tests/BloomFilterAggAggregateTest.cpp index 87faa40f5359..36ec41818e93 100644 --- a/velox/functions/sparksql/aggregates/tests/BloomFilterAggAggregateTest.cpp +++ b/velox/functions/sparksql/aggregates/tests/BloomFilterAggAggregateTest.cpp @@ -32,7 +32,7 @@ class BloomFilterAggAggregateTest allowInputShuffle(); } - std::string getSerializedBloomFilter(int32_t capacity) { + VectorPtr getSerializedBloomFilter(int32_t capacity) { BloomFilter bloomFilter; bloomFilter.reset(capacity); for (auto i = 0; i < 9; ++i) { @@ -41,7 +41,7 @@ class BloomFilterAggAggregateTest std::string data; data.resize(bloomFilter.serializedSize()); bloomFilter.serialize(data.data()); - return data; + return makeConstant(StringView(data), 1, VARBINARY()); } }; } // namespace @@ -49,10 +49,7 @@ class BloomFilterAggAggregateTest TEST_F(BloomFilterAggAggregateTest, basic) { auto vectors = {makeRowVector({makeFlatVector( 100, [](vector_size_t row) { return row % 9; })})}; - auto bloomFilter = getSerializedBloomFilter(4); - auto expected = { - makeRowVector({makeConstant(StringView(bloomFilter), 1)})}; - + auto expected = {makeRowVector({getSerializedBloomFilter(4)})}; testAggregations(vectors, {}, {"bloom_filter_agg(c0, 5, 64)"}, expected); } @@ -60,21 +57,17 @@ TEST_F(BloomFilterAggAggregateTest, bloomFilterAggArgument) { auto vectors = {makeRowVector({makeFlatVector( 100, [](vector_size_t row) { return row % 9; })})}; - auto bloomFilter1 = getSerializedBloomFilter(3); - auto expected1 = { - makeRowVector({makeConstant(StringView(bloomFilter1), 1)})}; + auto expected1 = {makeRowVector({getSerializedBloomFilter(3)})}; testAggregations(vectors, {}, {"bloom_filter_agg(c0, 6)"}, expected1); + // This capacity is kMaxNumBits / 16. - auto bloomFilter2 = getSerializedBloomFilter(262144); - auto expected2 = { - makeRowVector({makeConstant(StringView(bloomFilter2), 1)})}; + auto expected2 = {makeRowVector({getSerializedBloomFilter(262144)})}; testAggregations(vectors, {}, {"bloom_filter_agg(c0)"}, expected2); } TEST_F(BloomFilterAggAggregateTest, emptyInput) { auto vectors = {makeRowVector({makeFlatVector({})})}; - auto expected = {makeRowVector( - {makeNullableFlatVector({std::nullopt}, VARBINARY())})}; + auto expected = {makeRowVector({makeNullConstant(TypeKind::VARBINARY, 1)})}; testAggregations(vectors, {}, {"bloom_filter_agg(c0, 5, 64)"}, expected); } @@ -91,9 +84,8 @@ TEST_F(BloomFilterAggAggregateTest, nullBloomFilter) { TEST_F(BloomFilterAggAggregateTest, config) { auto vector = {makeRowVector({makeFlatVector( 100, [](vector_size_t row) { return row % 9; })})}; - auto bloomFilter = getSerializedBloomFilter(100); std::vector expected = { - makeRowVector({makeConstant(StringView(bloomFilter), 1)})}; + makeRowVector({getSerializedBloomFilter(100)})}; // This config will decide the bloom filter capacity, the expected value is // the serialized bloom filter, it should be consistent.