From 29ac1e7ee4cac43de85b1943750a68cd4b24076b Mon Sep 17 00:00:00 2001 From: huanghaibin Date: Fri, 8 Nov 2024 10:04:07 +0800 Subject: [PATCH] [fix](cloud-mow) MS should delete the existing keys before rewriting it when processing old version delete bitmap on cu compaction --- be/src/cloud/cloud_cumulative_compaction.cpp | 45 ++++------ be/src/cloud/cloud_cumulative_compaction.h | 2 +- be/src/cloud/cloud_meta_mgr.cpp | 12 +-- be/src/cloud/cloud_meta_mgr.h | 4 +- cloud/src/meta-service/meta_service.cpp | 13 ++- cloud/test/meta_service_test.cpp | 88 ++++++++++++++++++++ 6 files changed, 127 insertions(+), 37 deletions(-) diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp index b4ad6cf0d28ee2..f9965de3f64bd8 100644 --- a/be/src/cloud/cloud_cumulative_compaction.cpp +++ b/be/src/cloud/cloud_cumulative_compaction.cpp @@ -353,12 +353,12 @@ Status CloudCumulativeCompaction::modify_rowsets() { } if (_tablet->keys_type() == KeysType::UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write() && _input_rowsets.size() != 1) { - process_old_version_delete_bitmap(); + RETURN_IF_ERROR(process_old_version_delete_bitmap()); } return Status::OK(); } -void CloudCumulativeCompaction::process_old_version_delete_bitmap() { +Status CloudCumulativeCompaction::process_old_version_delete_bitmap() { // agg previously rowset old version delete bitmap std::vector pre_rowsets {}; std::vector pre_rowset_ids {}; @@ -397,40 +397,29 @@ void CloudCumulativeCompaction::process_old_version_delete_bitmap() { } if (!new_delete_bitmap->empty()) { // store agg delete bitmap - Status update_st; DBUG_EXECUTE_IF("CloudCumulativeCompaction.modify_rowsets.update_delete_bitmap_failed", { - update_st = Status::InternalError( + return Status::InternalError( "test fail to update delete bitmap for tablet_id {}", cloud_tablet()->tablet_id()); }); - if (update_st.ok()) { - update_st = _engine.meta_mgr().update_delete_bitmap_without_lock( - *cloud_tablet(), new_delete_bitmap.get()); - } - if (!update_st.ok()) { - std::stringstream ss; - ss << "failed to update delete bitmap for tablet=" << cloud_tablet()->tablet_id() - << " st=" << update_st.to_string(); - std::string msg = ss.str(); - LOG(WARNING) << msg; - } else { - Version version(_input_rowsets.front()->start_version(), - _input_rowsets.back()->end_version()); - for (auto it = new_delete_bitmap->delete_bitmap.begin(); - it != new_delete_bitmap->delete_bitmap.end(); it++) { - _tablet->tablet_meta()->delete_bitmap().set(it->first, it->second); - } - _tablet->tablet_meta()->delete_bitmap().add_to_remove_queue(version.to_string(), - to_remove_vec); - DBUG_EXECUTE_IF( - "CloudCumulativeCompaction.modify_rowsets.delete_expired_stale_rowsets", { - static_cast(_tablet.get()) - ->delete_expired_stale_rowsets(); - }); + RETURN_IF_ERROR(_engine.meta_mgr().cloud_update_delete_bitmap_without_lock( + *cloud_tablet(), new_delete_bitmap.get())); + + Version version(_input_rowsets.front()->start_version(), + _input_rowsets.back()->end_version()); + for (auto it = new_delete_bitmap->delete_bitmap.begin(); + it != new_delete_bitmap->delete_bitmap.end(); it++) { + _tablet->tablet_meta()->delete_bitmap().set(it->first, it->second); } + _tablet->tablet_meta()->delete_bitmap().add_to_remove_queue(version.to_string(), + to_remove_vec); + DBUG_EXECUTE_IF( + "CloudCumulativeCompaction.modify_rowsets.delete_expired_stale_rowsets", + { static_cast(_tablet.get())->delete_expired_stale_rowsets(); }); } } + return Status::OK(); } void CloudCumulativeCompaction::garbage_collection() { diff --git a/be/src/cloud/cloud_cumulative_compaction.h b/be/src/cloud/cloud_cumulative_compaction.h index 62c7cb44ea5bf5..1159dcb59ceef1 100644 --- a/be/src/cloud/cloud_cumulative_compaction.h +++ b/be/src/cloud/cloud_cumulative_compaction.h @@ -47,7 +47,7 @@ class CloudCumulativeCompaction : public CloudCompactionMixin { void update_cumulative_point(); - void process_old_version_delete_bitmap(); + Status process_old_version_delete_bitmap(); ReaderType compaction_type() const override { return ReaderType::READER_CUMULATIVE_COMPACTION; } diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index ad7d06c2e639c8..dced2f5f1a7417 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -705,8 +705,9 @@ Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_ for (size_t i = 0; i < rowset_ids.size(); i++) { RowsetId rst_id; rst_id.init(rowset_ids[i]); - delete_bitmap->merge({rst_id, segment_ids[i], vers[i]}, - roaring::Roaring::read(delete_bitmaps[i].data())); + delete_bitmap->merge( + {rst_id, segment_ids[i], vers[i]}, + roaring::Roaring::readSafe(delete_bitmaps[i].data(), delete_bitmaps[i].length())); } int64_t latency = cntl.latency_us(); if (latency > 100 * 1000) { // 100ms @@ -1058,9 +1059,10 @@ Status CloudMetaMgr::update_delete_bitmap(const CloudTablet& tablet, int64_t loc return st; } -Status CloudMetaMgr::update_delete_bitmap_without_lock(const CloudTablet& tablet, - DeleteBitmap* delete_bitmap) { - VLOG_DEBUG << "update_delete_bitmap_without_lock , tablet_id: " << tablet.tablet_id(); +Status CloudMetaMgr::cloud_update_delete_bitmap_without_lock(const CloudTablet& tablet, + DeleteBitmap* delete_bitmap) { + LOG(INFO) << "cloud_update_delete_bitmap_without_lock , tablet_id: " << tablet.tablet_id() + << ",delete_bitmap size:" << delete_bitmap->delete_bitmap.size(); UpdateDeleteBitmapRequest req; UpdateDeleteBitmapResponse res; req.set_cloud_unique_id(config::cloud_unique_id); diff --git a/be/src/cloud/cloud_meta_mgr.h b/be/src/cloud/cloud_meta_mgr.h index 0134469407a855..d407b8b4ff1410 100644 --- a/be/src/cloud/cloud_meta_mgr.h +++ b/be/src/cloud/cloud_meta_mgr.h @@ -95,8 +95,8 @@ class CloudMetaMgr { Status update_delete_bitmap(const CloudTablet& tablet, int64_t lock_id, int64_t initiator, DeleteBitmap* delete_bitmap); - Status update_delete_bitmap_without_lock(const CloudTablet& tablet, - DeleteBitmap* delete_bitmap); + Status cloud_update_delete_bitmap_without_lock(const CloudTablet& tablet, + DeleteBitmap* delete_bitmap); Status get_delete_bitmap_update_lock(const CloudTablet& tablet, int64_t lock_id, int64_t initiator); diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 8212ab991b51ba..a018258020baae 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -1782,6 +1782,7 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont // lock_id > 0 : load // lock_id = -1 : compaction // lock_id = -2 : schema change + // lock_id = -3 : compaction update delete bitmap without lock if (request->lock_id() > 0) { std::string pending_val; if (!delete_bitmap_keys.SerializeToString(&pending_val)) { @@ -1794,6 +1795,15 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont fdb_txn_size = fdb_txn_size + pending_key.size() + pending_val.size(); LOG(INFO) << "xxx update delete bitmap put pending_key=" << hex(pending_key) << " lock_id=" << request->lock_id() << " value_size: " << pending_val.size(); + } else if (request->lock_id() == -3) { + // delete existing key + for (size_t i = 0; i < request->rowset_ids_size(); ++i) { + auto& start_key = delete_bitmap_keys.delete_bitmap_keys(i); + std::string end_key {start_key}; + encode_int64(INT64_MAX, &end_key); + txn->remove(start_key, end_key); + LOG(INFO) << "xxx remove existing key=" << hex(start_key) << " tablet_id=" << tablet_id; + } } // 4. Update delete bitmap for curent txn @@ -1838,7 +1848,8 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont total_key++; total_size += key.size() + val.size(); VLOG_DEBUG << "xxx update delete bitmap put delete_bitmap_key=" << hex(key) - << " lock_id=" << request->lock_id() << " value_size: " << val.size(); + << " lock_id=" << request->lock_id() << " key_size: " << key.size() + << " value_size: " << val.size(); } err = txn->commit(); diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp index 3baec482710bc4..ea52c23c467456 100644 --- a/cloud/test/meta_service_test.cpp +++ b/cloud/test/meta_service_test.cpp @@ -4766,6 +4766,94 @@ TEST(MetaServiceTest, UpdateDeleteBitmap) { ASSERT_EQ(get_delete_bitmap_res.versions(100), 3); ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(100), "abcd4"); } + + // update existing delete bitmap key + { + //first update new key + UpdateDeleteBitmapRequest update_delete_bitmap_req; + UpdateDeleteBitmapResponse update_delete_bitmap_res; + update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id"); + update_delete_bitmap_req.set_table_id(112); + update_delete_bitmap_req.set_partition_id(123); + update_delete_bitmap_req.set_lock_id(888); + update_delete_bitmap_req.set_initiator(-1); + update_delete_bitmap_req.set_tablet_id(333); + std::string large_value = generate_random_string(300 * 1000 * 3); + update_delete_bitmap_req.add_rowset_ids("456"); + update_delete_bitmap_req.add_segment_ids(0); + update_delete_bitmap_req.add_versions(2); + update_delete_bitmap_req.add_segment_delete_bitmaps(large_value); + meta_service->update_delete_bitmap( + reinterpret_cast(&cntl), + &update_delete_bitmap_req, &update_delete_bitmap_res, nullptr); + ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK); + + GetDeleteBitmapRequest get_delete_bitmap_req; + GetDeleteBitmapResponse get_delete_bitmap_res; + get_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id"); + get_delete_bitmap_req.set_tablet_id(333); + + get_delete_bitmap_req.add_rowset_ids("456"); + get_delete_bitmap_req.add_begin_versions(2); + get_delete_bitmap_req.add_end_versions(2); + + meta_service->get_delete_bitmap(reinterpret_cast(&cntl), + &get_delete_bitmap_req, &get_delete_bitmap_res, nullptr); + ASSERT_EQ(get_delete_bitmap_res.status().code(), MetaServiceCode::OK); + ASSERT_EQ(get_delete_bitmap_res.rowset_ids_size(), 1); + ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1); + ASSERT_EQ(get_delete_bitmap_res.versions_size(), 1); + ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1); + + ASSERT_EQ(get_delete_bitmap_res.rowset_ids(0), "456"); + ASSERT_EQ(get_delete_bitmap_res.segment_ids(0), 0); + ASSERT_EQ(get_delete_bitmap_res.versions(0), 2); + ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(0), large_value); + } + + { + //compaction update delete bitmap without lock + UpdateDeleteBitmapRequest update_delete_bitmap_req; + UpdateDeleteBitmapResponse update_delete_bitmap_res; + update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id"); + update_delete_bitmap_req.set_table_id(112); + update_delete_bitmap_req.set_partition_id(123); + update_delete_bitmap_req.set_unlock(true); + update_delete_bitmap_req.set_lock_id(-3); + update_delete_bitmap_req.set_initiator(-1); + update_delete_bitmap_req.set_tablet_id(333); + std::string large_value = generate_random_string(300 * 1000); + update_delete_bitmap_req.add_rowset_ids("456"); + update_delete_bitmap_req.add_segment_ids(0); + update_delete_bitmap_req.add_versions(2); + update_delete_bitmap_req.add_segment_delete_bitmaps(large_value); + meta_service->update_delete_bitmap( + reinterpret_cast(&cntl), + &update_delete_bitmap_req, &update_delete_bitmap_res, nullptr); + ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK); + + GetDeleteBitmapRequest get_delete_bitmap_req; + GetDeleteBitmapResponse get_delete_bitmap_res; + get_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id"); + get_delete_bitmap_req.set_tablet_id(333); + + get_delete_bitmap_req.add_rowset_ids("456"); + get_delete_bitmap_req.add_begin_versions(2); + get_delete_bitmap_req.add_end_versions(2); + + meta_service->get_delete_bitmap(reinterpret_cast(&cntl), + &get_delete_bitmap_req, &get_delete_bitmap_res, nullptr); + ASSERT_EQ(get_delete_bitmap_res.status().code(), MetaServiceCode::OK); + ASSERT_EQ(get_delete_bitmap_res.rowset_ids_size(), 1); + ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1); + ASSERT_EQ(get_delete_bitmap_res.versions_size(), 1); + ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1); + + ASSERT_EQ(get_delete_bitmap_res.rowset_ids(0), "456"); + ASSERT_EQ(get_delete_bitmap_res.segment_ids(0), 0); + ASSERT_EQ(get_delete_bitmap_res.versions(0), 2); + ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(0), large_value); + } } TEST(MetaServiceTest, UpdateDeleteBitmapWithException) {