Skip to content

Commit 71bf354

Browse files
xinyiZzzYour Name
authored and
Your Name
committed
[fix](arrow-flight-sql) Fix Doris NULL column conversion to arrow batch (#43929)
### What problem does this PR solve? Problem Summary: The representation of NULL columns in Doris is special, which is `DataTypeNull<DataTypeNumber::Uint8>`. `Uint8` uses `arrow::BooleanBuilder` when serializing into arrow batch, which does not match the expected `arrow::NullBuilder`. Fix: ``` *** Query id: fd32741526804c1e-bc016473fd8f3aa3 *** *** is nereids: 1 *** *** tablet id: 0 *** *** Aborted at 1731327262 (unix time) try "date -d @1731327262" if you are using GNU date *** *** Current BE git commitID: 653e315 *** *** SIGSEGV address not mapped to object (@0x100000024) received by PID 1442863 (TID 1443456 OR 0x7f8b8cdea700) from PID 36; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/common/signal_handler.h:421 1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in /mnt/disk2/liyifan/doris/jdk-17.0.2/lib/server/libjvm.so 2# JVM_handle_linux_signal in /mnt/disk2/liyifan/doris/jdk-17.0.2/lib/server/libjvm.so 3# 0x00007F8CA1F38B50 in /lib64/libc.so.6 4# 0x000055FC45E5B2D3 in /mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be 5# arrow::BooleanBuilder::AppendValues(unsigned char const*, long, unsigned char const*) in /mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be 6# doris::vectorized::DataTypeNumberSerDe<unsigned char>::write_column_to_arrow(doris::vectorized::IColumn const&, doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 15ul, 16ul> const*, arrow::ArrayBuilder*, int, int, cctz::time_zone const&) const at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/data_types/serde/data_type_number_serde.cpp:86 7# doris::FromBlockConverter::convert(std::shared_ptr<arrow::RecordBatch>*) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/util/arrow/block_convertor.cpp:390 8# doris::convert_to_arrow_batch(doris::vectorized::Block const&, std::shared_ptr<arrow::Schema> const&, arrow::MemoryPool*, std::shared_ptr<arrow::RecordBatch>*, cctz::time_zone const&) in /mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be 9# doris::vectorized::VArrowFlightResultWriter::write(doris::vectorized::Block&) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/sink/varrow_flight_result_writer.cpp:76 10# doris::vectorized::VResultSink::send(doris::RuntimeState*, doris::vectorized::Block*, bool) at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/sink/vresult_sink.cpp:149 11# doris::PlanFragmentExecutor::open_vectorized_internal() at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/runtime/plan_fragment_executor.cpp:341 12# doris::PlanFragmentExecutor::open() at /mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/runtime/plan_fragment_executor.cpp:273 ```
1 parent d38c722 commit 71bf354

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

be/src/vec/data_types/serde/data_type_number_serde.cpp

+15-6
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,21 @@ void DataTypeNumberSerDe<T>::write_column_to_arrow(const IColumn& column, const
7878
auto arrow_null_map = revert_null_map(null_map, start, end);
7979
auto arrow_null_map_data = arrow_null_map.empty() ? nullptr : arrow_null_map.data();
8080
if constexpr (std::is_same_v<T, UInt8>) {
81-
ARROW_BUILDER_TYPE& builder = assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
82-
checkArrowStatus(
83-
builder.AppendValues(reinterpret_cast<const uint8_t*>(col_data.data() + start),
84-
end - start,
85-
reinterpret_cast<const uint8_t*>(arrow_null_map_data)),
86-
column.get_name(), array_builder->type()->name());
81+
auto* null_builder = dynamic_cast<arrow::NullBuilder*>(array_builder);
82+
if (null_builder) {
83+
for (size_t i = start; i < end; ++i) {
84+
checkArrowStatus(null_builder->AppendNull(), column.get_name(),
85+
null_builder->type()->name());
86+
}
87+
} else {
88+
ARROW_BUILDER_TYPE& builder = assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
89+
checkArrowStatus(
90+
builder.AppendValues(reinterpret_cast<const uint8_t*>(col_data.data() + start),
91+
end - start,
92+
reinterpret_cast<const uint8_t*>(arrow_null_map_data)),
93+
column.get_name(), array_builder->type()->name());
94+
}
95+
8796
} else if constexpr (std::is_same_v<T, Int128>) {
8897
auto& string_builder = assert_cast<arrow::StringBuilder&>(*array_builder);
8998
for (size_t i = start; i < end; ++i) {

0 commit comments

Comments
 (0)