From 641d53d3c350cc1f3a7afe17f47d9bab3bdb3948 Mon Sep 17 00:00:00 2001 From: Mryange Date: Tue, 3 Dec 2024 16:49:50 +0800 Subject: [PATCH] [fix](agg) Fixed a core dump when using the IPv6 type in array_agg. (#44877) ### What problem does this PR solve? https://github.com/apache/doris/pull/40697 that introduced the issue. ``` F20241202 18:56:00.498481 233860 assert_cast.h:57] Bad cast from type:doris::vectorized::ColumnVector to doris::vectorized::ColumnVector > *** Check failure stack trace: *** @ 0x562c12168fb6 google::LogMessage::SendToLog() @ 0x562c12165a00 google::LogMessage::Flush() @ 0x562c121697f9 google::LogMessageFatal::~LogMessageFatal() @ 0x562c0acb66a6 _ZZ11assert_castIRN5doris10vectorized12ColumnVectorIN4wide7integerILm128EjEEEEL18TypeCheckOnRelease1ERNS1_7IColumnEET_OT1_ENKUlOSB_E_clISA_EES7_SE_ @ 0x562c0acb64e7 assert_cast<>() @ 0x562c0acb637d doris::vectorized::AggregateFunctionArrayAggData<>::insert_result_into() @ 0x562c11f12282 doris::pipeline::AggLocalState::_get_results_without_key() @ 0x562c11f3a251 std::_Function_handler<>::_M_invoke() @ 0x562c11f13ee8 doris::pipeline::AggSourceOperatorX::get_block() @ 0x562c1174e3cf doris::pipeline::OperatorXBase::get_block_after_projects() @ 0x562c120ad87b doris::pipeline::PipelineTask::execute() @ 0x562c120bb7b2 doris::pipeline::TaskScheduler::_do_work() @ 0x562c081ef935 doris::ThreadPool::dispatch_thread() @ 0x562c081e6321 doris::Thread::supervise_thread() @ 0x7f446f9b01ca start_thread @ 0x7f447039fe73 __GI___clone @ (nil) (unknown) *** Query id: c24a997f61b847d2-adece222f56b3b9e *** *** is nereids: 1 *** *** tablet id: 0 *** *** Aborted at 1733136960 (unix time) try "date -d @1733136960" if you are using GNU date *** *** Current BE git commitID: 1551ef9e63 *** *** SIGABRT unknown detail explain (@0x47000039002) received by PID 233474 (TID 233860 OR 0x7f436d1f2700) from PID 233474; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk2/yanxuecheng/doris/be/src/common/signal_handler.h:421 1# 0x00007F44703B4B50 in /lib64/libc.so.6 2# gsignal in /lib64/libc.so.6 3# __GI_abort in /lib64/libc.so.6 4# 0x0000562C1217388D in /mnt/disk2/yanxuecheng/doris/output/be/lib/doris_be 5# 0x0000562C12165ECA in /mnt/disk2/yanxuecheng/doris/output/be/lib/doris_be 6# google::LogMessage::SendToLog() in /mnt/disk2/yanxuecheng/doris/output/be/lib/doris_be 7# google::LogMessage::Flush() in /mnt/disk2/yanxuecheng/doris/output/be/lib/doris_be 8# google::LogMessageFatal::~LogMessageFatal() in /mnt/disk2/yanxuecheng/doris/output/be/lib/doris_be 9# doris::vectorized::ColumnVector >& assert_cast >&, (TypeCheckOnRelease)1, doris::vectorized::IColumn&>(doris::vectorized::IColumn&)::{lambda(auto:1&&)#1}::operator()(doris::vectorized::IColumn&) const at /mnt/disk2/yanxuecheng/doris/be/src/vec/common/assert_cast.h:57 10# doris::vectorized::ColumnVector >& assert_cast >&, (TypeCheckOnRelease)1, doris::vectorized::IColumn&>(doris::vectorized::IColumn&) at /mnt/disk2/yanxuecheng/doris/be/src/vec/common/assert_cast.h:72 11# doris::vectorized::AggregateFunctionArrayAggData >::insert_result_into(doris::vectorized::IColumn&) const in /mnt/disk2/yanxuecheng/doris/output/be/lib/doris_be 12# doris::pipeline::AggLocalState::_get_results_without_key(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/exec/aggregation_source_operator.cpp:349 13# std::_Function_handler, std::_Placeholder<2>, std::_Placeholder<3>))(doris::RuntimeState*, doris::vectorized::Block*, bool*)> >::_M_invoke(std::_Any_data const&, doris::RuntimeState*&&, doris::vectorized::Block*&&, bool*&&) at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290 14# doris::pipeline::AggSourceOperatorX::get_block(doris::RuntimeState*, doris::vectorized::Block*, bool*) at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/exec/aggregation_source_operator.cpp:439 15# doris::pipeline::OperatorXBase::get_block_after_projects(doris::RuntimeState*, doris::vectorized::Block*, bool*) in /mnt/disk2/yanxuecheng/doris/output/be/lib/doris_be 16# doris::pipeline::PipelineTask::execute(bool*) at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/pipeline_task.cpp:378 17# doris::pipeline::TaskScheduler::_do_work(int) at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/task_scheduler.cpp:138 18# doris::ThreadPool::dispatch_thread() in /mnt/disk2/yanxuecheng/doris/output/be/lib/doris_be 19# doris::Thread::supervise_thread(void*) at /mnt/disk2/yanxuecheng/doris/be/src/util/thread.cpp:499 20# start_thread in /lib64/libpthread.so.0 21# __clone in /lib64/libc.so.6 ``` --- .../aggregate_function_collect.cpp | 10 ++++++++-- .../data/query_p0/aggregate/array_agg.out | 3 +++ .../suites/query_p0/aggregate/array_agg.groovy | 16 ++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp index d726b7c6355318..552d47d3567f9f 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp @@ -72,12 +72,18 @@ AggregateFunctionPtr create_aggregate_function_collect_impl(const std::string& n if (which.is_date_or_datetime()) { return do_create_agg_function_collect(distinct, argument_types, result_is_nullable); - } else if (which.is_date_v2() || which.is_ipv4()) { + } else if (which.is_date_v2()) { return do_create_agg_function_collect(distinct, argument_types, result_is_nullable); - } else if (which.is_date_time_v2() || which.is_ipv6()) { + } else if (which.is_date_time_v2()) { return do_create_agg_function_collect(distinct, argument_types, result_is_nullable); + } else if (which.is_ipv6()) { + return do_create_agg_function_collect(distinct, argument_types, + result_is_nullable); + } else if (which.is_ipv4()) { + return do_create_agg_function_collect(distinct, argument_types, + result_is_nullable); } else if (which.is_string()) { return do_create_agg_function_collect( distinct, argument_types, result_is_nullable); diff --git a/regression-test/data/query_p0/aggregate/array_agg.out b/regression-test/data/query_p0/aggregate/array_agg.out index 1fe44df3a144bc..62ffb5fcf475c0 100644 --- a/regression-test/data/query_p0/aggregate/array_agg.out +++ b/regression-test/data/query_p0/aggregate/array_agg.out @@ -251,3 +251,6 @@ 8 [{"id":8}, {"id":8}, {"id":8}, {"id":8}] 9 [{"id":9}, {"id":9}, {"id":9}, {"id":9}] +-- !select -- +[null, "0.0.0.123", "0.0.12.42", "0.119.130.67"] [null, "::855d", "::0.4.221.183", "::a:7429:d0d6:6e08:9f5f"] + diff --git a/regression-test/suites/query_p0/aggregate/array_agg.groovy b/regression-test/suites/query_p0/aggregate/array_agg.groovy index 217285b572c538..42fb3b131a4f86 100644 --- a/regression-test/suites/query_p0/aggregate/array_agg.groovy +++ b/regression-test/suites/query_p0/aggregate/array_agg.groovy @@ -277,8 +277,24 @@ suite("array_agg") { order_qt_sql_array_agg_map """ SELECT id, array_agg(km) FROM test_array_agg_complex GROUP BY id ORDER BY id """ order_qt_sql_array_agg_struct """ SELECT id, array_agg(ks) FROM test_array_agg_complex GROUP BY id ORDER BY id """ + + sql """ DROP TABLE IF EXISTS test_array_agg_ip;""" + sql """ + CREATE TABLE test_array_agg_ip( + k1 BIGINT , + k4 ipv4 , + k6 ipv6 , + s string + ) DISTRIBUTED BY HASH(k1) BUCKETS 1 PROPERTIES("replication_num" = "1"); + """ + sql """ insert into test_array_agg_ip values(1,123,34141,"0.0.0.123") , (2,3114,318903,"0.0.0.123") , (3,7832131,192837891738927931231,"2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D"),(4,null,null,"2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D"); """ + + + qt_select """select array_sort(array_agg(k4)),array_sort(array_agg(k6)) from test_array_agg_ip """ + sql "DROP TABLE `test_array_agg`" sql "DROP TABLE `test_array_agg1`" sql "DROP TABLE `test_array_agg_int`" sql "DROP TABLE `test_array_agg_decimal`" + sql "DROP TABLE `test_array_agg_ip`" }