Skip to content

Commit 1601d75

Browse files
authored
[Fix](merge-on-write) Fix MergeIndexDeleteBitmapCalculator::calculate_one() coredump (apache#44284)
### What problem does this PR solve? Problem Summary: `MergeIndexDeleteBitmapCalculatorContext::get_current_key()` may return non-OK status when encounter memory allocation failure, which makes `MergeIndexDeleteBitmapCalculatorContext::Comparator::operator()` returns incorrect result and break some assumptions during the process of multiway merging, which leads to coredump. ``` 1# 0x00007F507D0B3520 in /lib/x86_64-linux-gnu/libc.so.6 2# pthread_kill at ./nptl/pthread_kill.c:89 3# raise at ../sysdeps/posix/raise.c:27 4# abort at ./stdlib/abort.c:81 5# 0x000055E3A805DD7D in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be 6# 0x000055E3A805047A in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be 7# google::LogMessage::SendToLog() in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be 8# google::LogMessage::Flush() in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be 9# google::LogMessageFatal::~LogMessageFatal() in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be 10# doris::MergeIndexDeleteBitmapCalculatorContext::seek_at_or_after(doris::Slice const&) in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be 11# doris::MergeIndexDeleteBitmapCalculator::calculate_one(doris::RowLocation&) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/delete_bitmap_calculator.cpp:197 12# doris::MergeIndexDeleteBitmapCalculator::calculate_all(std::shared_ptr) in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be 13# doris::Tablet::calc_delete_bitmap_between_segments(std::shared_ptr, std::vector, std::allocator > > const&, std::shared_ptr) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/tablet.cpp:4075 14# doris::Tablet::update_delete_bitmap_without_lock(std::shared_ptr const&, std::vector, std::allocator > > const*) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/tablet.cpp:3468 15# doris::Tablet::revise_tablet_meta(std::vector, std::allocator > > const&, std::vector, std::allocator > > const&, bool) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/tablet.cpp:415 16# doris::EngineCloneTask::_finish_incremental_clone(doris::Tablet*, std::shared_ptr const&, long) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/task/engine_clone_task.cpp:795 17# doris::EngineCloneTask::_finish_clone(doris::Tablet*, std::__cxx11::basic_string, std::allocator > const&, long, bool) in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be 18# doris::EngineCloneTask::_do_clone() in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be 19# doris::EngineCloneTask::execute() at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/olap/task/engine_clone_task.cpp:159 20# doris::clone_callback(doris::StorageEngine&, doris::TMasterInfo const&, doris::TAgentTaskRequest const&) in /mnt/hdd01/ci/doris-deploy-branch-2.1-local/be/lib/doris_be 21# std::_Function_handler(doris::TAgentTaskRequest const&) const::{lambda()#1}>::_M_invoke(std::_Any_data const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291 22# doris::ThreadPool::dispatch_thread() at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/util/threadpool.cpp:551 23# doris::Thread::supervise_thread(void*) at /home/zcp/repo_center/doris_branch-2.1/doris/be/src/util/thread.cpp:499 24# start_thread at ./nptl/pthread_create.c:442 25# 0x00007F507D197850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83 ```
1 parent 2a4885e commit 1601d75

File tree

1 file changed

+38
-32
lines changed

1 file changed

+38
-32
lines changed

be/src/olap/delete_bitmap_calculator.cpp

+38-32
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,10 @@ bool MergeIndexDeleteBitmapCalculatorContext::Comparator::operator()(
9090
// std::proiroty_queue is a max heap, and function should return the result of `lhs < rhs`
9191
// so if the result of the function is true, rhs will be popped before lhs
9292
Slice key1, key2;
93-
RETURN_IF_ERROR(lhs->get_current_key(key1));
94-
RETURN_IF_ERROR(rhs->get_current_key(key2));
93+
// MergeIndexDeleteBitmapCalculatorContext::get_current_key may return non-OK status if encounter
94+
// memory allocation failure, we can only throw exception here to propagate error in this situation
95+
THROW_IF_ERROR(lhs->get_current_key(key1));
96+
THROW_IF_ERROR(rhs->get_current_key(key2));
9597
if (_sequence_length == 0 && _rowid_length == 0) {
9698
auto cmp_result = key1.compare(key2);
9799
// when key1 is the same as key2,
@@ -135,28 +137,30 @@ Status MergeIndexDeleteBitmapCalculator::init(RowsetId rowset_id,
135137
std::vector<SegmentSharedPtr> const& segments,
136138
size_t seq_col_length, size_t rowdid_length,
137139
size_t max_batch_size) {
138-
_rowset_id = rowset_id;
139-
_seq_col_length = seq_col_length;
140-
_rowid_length = rowdid_length;
141-
_comparator =
142-
MergeIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length, _rowid_length);
143-
_contexts.reserve(segments.size());
144-
_heap = std::make_unique<Heap>(_comparator);
140+
RETURN_IF_CATCH_EXCEPTION({
141+
_rowset_id = rowset_id;
142+
_seq_col_length = seq_col_length;
143+
_rowid_length = rowdid_length;
144+
_comparator =
145+
MergeIndexDeleteBitmapCalculatorContext::Comparator(seq_col_length, _rowid_length);
146+
_contexts.reserve(segments.size());
147+
_heap = std::make_unique<Heap>(_comparator);
145148

146-
for (auto& segment : segments) {
147-
RETURN_IF_ERROR(segment->load_index());
148-
auto pk_idx = segment->get_primary_key_index();
149-
std::unique_ptr<segment_v2::IndexedColumnIterator> index;
150-
RETURN_IF_ERROR(pk_idx->new_iterator(&index));
151-
auto index_type = vectorized::DataTypeFactory::instance().create_data_type(
152-
pk_idx->type_info()->type(), 1, 0);
153-
_contexts.emplace_back(std::move(index), index_type, segment->id(), pk_idx->num_rows());
154-
_heap->push(&_contexts.back());
155-
}
156-
if (_rowid_length > 0) {
157-
_rowid_coder = get_key_coder(
158-
get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT>()->type());
159-
}
149+
for (auto& segment : segments) {
150+
RETURN_IF_ERROR(segment->load_index());
151+
auto pk_idx = segment->get_primary_key_index();
152+
std::unique_ptr<segment_v2::IndexedColumnIterator> index;
153+
RETURN_IF_ERROR(pk_idx->new_iterator(&index));
154+
auto index_type = vectorized::DataTypeFactory::instance().create_data_type(
155+
pk_idx->type_info()->type(), 1, 0);
156+
_contexts.emplace_back(std::move(index), index_type, segment->id(), pk_idx->num_rows());
157+
_heap->push(&_contexts.back());
158+
}
159+
if (_rowid_length > 0) {
160+
_rowid_coder = get_key_coder(
161+
get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT>()->type());
162+
}
163+
});
160164
return Status::OK();
161165
}
162166

@@ -209,16 +213,18 @@ Status MergeIndexDeleteBitmapCalculator::calculate_one(RowLocation& loc) {
209213
}
210214

211215
Status MergeIndexDeleteBitmapCalculator::calculate_all(DeleteBitmapPtr delete_bitmap) {
212-
RowLocation loc;
213-
while (true) {
214-
auto st = calculate_one(loc);
215-
if (st.is<ErrorCode::END_OF_FILE>()) {
216-
break;
216+
RETURN_IF_CATCH_EXCEPTION({
217+
RowLocation loc;
218+
while (true) {
219+
auto st = calculate_one(loc);
220+
if (st.is<ErrorCode::END_OF_FILE>()) {
221+
break;
222+
}
223+
RETURN_IF_ERROR(st);
224+
delete_bitmap->add({_rowset_id, loc.segment_id, DeleteBitmap::TEMP_VERSION_COMMON},
225+
loc.row_id);
217226
}
218-
RETURN_IF_ERROR(st);
219-
delete_bitmap->add({_rowset_id, loc.segment_id, DeleteBitmap::TEMP_VERSION_COMMON},
220-
loc.row_id);
221-
}
227+
});
222228
return Status::OK();
223229
}
224230

0 commit comments

Comments
 (0)