Skip to content

Commit 3bd553d

Browse files
authored
[fix](cloud-mow) MS should delete the existing keys before rewriting it when processing old version delete bitmap on cu compaction (#43479)
pick pr #42379
1 parent efc2ec5 commit 3bd553d

6 files changed

+127
-37
lines changed

be/src/cloud/cloud_cumulative_compaction.cpp

+17-28
Original file line numberDiff line numberDiff line change
@@ -353,12 +353,12 @@ Status CloudCumulativeCompaction::modify_rowsets() {
353353
}
354354
if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
355355
_tablet->enable_unique_key_merge_on_write() && _input_rowsets.size() != 1) {
356-
process_old_version_delete_bitmap();
356+
RETURN_IF_ERROR(process_old_version_delete_bitmap());
357357
}
358358
return Status::OK();
359359
}
360360

361-
void CloudCumulativeCompaction::process_old_version_delete_bitmap() {
361+
Status CloudCumulativeCompaction::process_old_version_delete_bitmap() {
362362
// agg previously rowset old version delete bitmap
363363
std::vector<RowsetSharedPtr> pre_rowsets {};
364364
std::vector<std::string> pre_rowset_ids {};
@@ -397,40 +397,29 @@ void CloudCumulativeCompaction::process_old_version_delete_bitmap() {
397397
}
398398
if (!new_delete_bitmap->empty()) {
399399
// store agg delete bitmap
400-
Status update_st;
401400
DBUG_EXECUTE_IF("CloudCumulativeCompaction.modify_rowsets.update_delete_bitmap_failed",
402401
{
403-
update_st = Status::InternalError(
402+
return Status::InternalError(
404403
"test fail to update delete bitmap for tablet_id {}",
405404
cloud_tablet()->tablet_id());
406405
});
407-
if (update_st.ok()) {
408-
update_st = _engine.meta_mgr().update_delete_bitmap_without_lock(
409-
*cloud_tablet(), new_delete_bitmap.get());
410-
}
411-
if (!update_st.ok()) {
412-
std::stringstream ss;
413-
ss << "failed to update delete bitmap for tablet=" << cloud_tablet()->tablet_id()
414-
<< " st=" << update_st.to_string();
415-
std::string msg = ss.str();
416-
LOG(WARNING) << msg;
417-
} else {
418-
Version version(_input_rowsets.front()->start_version(),
419-
_input_rowsets.back()->end_version());
420-
for (auto it = new_delete_bitmap->delete_bitmap.begin();
421-
it != new_delete_bitmap->delete_bitmap.end(); it++) {
422-
_tablet->tablet_meta()->delete_bitmap().set(it->first, it->second);
423-
}
424-
_tablet->tablet_meta()->delete_bitmap().add_to_remove_queue(version.to_string(),
425-
to_remove_vec);
426-
DBUG_EXECUTE_IF(
427-
"CloudCumulativeCompaction.modify_rowsets.delete_expired_stale_rowsets", {
428-
static_cast<CloudTablet*>(_tablet.get())
429-
->delete_expired_stale_rowsets();
430-
});
406+
RETURN_IF_ERROR(_engine.meta_mgr().cloud_update_delete_bitmap_without_lock(
407+
*cloud_tablet(), new_delete_bitmap.get()));
408+
409+
Version version(_input_rowsets.front()->start_version(),
410+
_input_rowsets.back()->end_version());
411+
for (auto it = new_delete_bitmap->delete_bitmap.begin();
412+
it != new_delete_bitmap->delete_bitmap.end(); it++) {
413+
_tablet->tablet_meta()->delete_bitmap().set(it->first, it->second);
431414
}
415+
_tablet->tablet_meta()->delete_bitmap().add_to_remove_queue(version.to_string(),
416+
to_remove_vec);
417+
DBUG_EXECUTE_IF(
418+
"CloudCumulativeCompaction.modify_rowsets.delete_expired_stale_rowsets",
419+
{ static_cast<CloudTablet*>(_tablet.get())->delete_expired_stale_rowsets(); });
432420
}
433421
}
422+
return Status::OK();
434423
}
435424

436425
void CloudCumulativeCompaction::garbage_collection() {

be/src/cloud/cloud_cumulative_compaction.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class CloudCumulativeCompaction : public CloudCompactionMixin {
4747

4848
void update_cumulative_point();
4949

50-
void process_old_version_delete_bitmap();
50+
Status process_old_version_delete_bitmap();
5151

5252
ReaderType compaction_type() const override { return ReaderType::READER_CUMULATIVE_COMPACTION; }
5353

be/src/cloud/cloud_meta_mgr.cpp

+7-5
Original file line numberDiff line numberDiff line change
@@ -712,8 +712,9 @@ Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_
712712
for (size_t i = 0; i < rowset_ids.size(); i++) {
713713
RowsetId rst_id;
714714
rst_id.init(rowset_ids[i]);
715-
delete_bitmap->merge({rst_id, segment_ids[i], vers[i]},
716-
roaring::Roaring::read(delete_bitmaps[i].data()));
715+
delete_bitmap->merge(
716+
{rst_id, segment_ids[i], vers[i]},
717+
roaring::Roaring::readSafe(delete_bitmaps[i].data(), delete_bitmaps[i].length()));
717718
}
718719
int64_t latency = cntl.latency_us();
719720
if (latency > 100 * 1000) { // 100ms
@@ -1065,9 +1066,10 @@ Status CloudMetaMgr::update_delete_bitmap(const CloudTablet& tablet, int64_t loc
10651066
return st;
10661067
}
10671068

1068-
Status CloudMetaMgr::update_delete_bitmap_without_lock(const CloudTablet& tablet,
1069-
DeleteBitmap* delete_bitmap) {
1070-
VLOG_DEBUG << "update_delete_bitmap_without_lock , tablet_id: " << tablet.tablet_id();
1069+
Status CloudMetaMgr::cloud_update_delete_bitmap_without_lock(const CloudTablet& tablet,
1070+
DeleteBitmap* delete_bitmap) {
1071+
LOG(INFO) << "cloud_update_delete_bitmap_without_lock , tablet_id: " << tablet.tablet_id()
1072+
<< ",delete_bitmap size:" << delete_bitmap->delete_bitmap.size();
10711073
UpdateDeleteBitmapRequest req;
10721074
UpdateDeleteBitmapResponse res;
10731075
req.set_cloud_unique_id(config::cloud_unique_id);

be/src/cloud/cloud_meta_mgr.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ class CloudMetaMgr {
9595
Status update_delete_bitmap(const CloudTablet& tablet, int64_t lock_id, int64_t initiator,
9696
DeleteBitmap* delete_bitmap);
9797

98-
Status update_delete_bitmap_without_lock(const CloudTablet& tablet,
99-
DeleteBitmap* delete_bitmap);
98+
Status cloud_update_delete_bitmap_without_lock(const CloudTablet& tablet,
99+
DeleteBitmap* delete_bitmap);
100100

101101
Status get_delete_bitmap_update_lock(const CloudTablet& tablet, int64_t lock_id,
102102
int64_t initiator);

cloud/src/meta-service/meta_service.cpp

+12-1
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,7 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
17821782
// lock_id > 0 : load
17831783
// lock_id = -1 : compaction
17841784
// lock_id = -2 : schema change
1785+
// lock_id = -3 : compaction update delete bitmap without lock
17851786
if (request->lock_id() > 0) {
17861787
std::string pending_val;
17871788
if (!delete_bitmap_keys.SerializeToString(&pending_val)) {
@@ -1794,6 +1795,15 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
17941795
fdb_txn_size = fdb_txn_size + pending_key.size() + pending_val.size();
17951796
LOG(INFO) << "xxx update delete bitmap put pending_key=" << hex(pending_key)
17961797
<< " lock_id=" << request->lock_id() << " value_size: " << pending_val.size();
1798+
} else if (request->lock_id() == -3) {
1799+
// delete existing key
1800+
for (size_t i = 0; i < request->rowset_ids_size(); ++i) {
1801+
auto& start_key = delete_bitmap_keys.delete_bitmap_keys(i);
1802+
std::string end_key {start_key};
1803+
encode_int64(INT64_MAX, &end_key);
1804+
txn->remove(start_key, end_key);
1805+
LOG(INFO) << "xxx remove existing key=" << hex(start_key) << " tablet_id=" << tablet_id;
1806+
}
17971807
}
17981808

17991809
// 4. Update delete bitmap for curent txn
@@ -1838,7 +1848,8 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
18381848
total_key++;
18391849
total_size += key.size() + val.size();
18401850
VLOG_DEBUG << "xxx update delete bitmap put delete_bitmap_key=" << hex(key)
1841-
<< " lock_id=" << request->lock_id() << " value_size: " << val.size();
1851+
<< " lock_id=" << request->lock_id() << " key_size: " << key.size()
1852+
<< " value_size: " << val.size();
18421853
}
18431854

18441855
err = txn->commit();

cloud/test/meta_service_test.cpp

+88
Original file line numberDiff line numberDiff line change
@@ -4766,6 +4766,94 @@ TEST(MetaServiceTest, UpdateDeleteBitmap) {
47664766
ASSERT_EQ(get_delete_bitmap_res.versions(100), 3);
47674767
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(100), "abcd4");
47684768
}
4769+
4770+
// update existing delete bitmap key
4771+
{
4772+
//first update new key
4773+
UpdateDeleteBitmapRequest update_delete_bitmap_req;
4774+
UpdateDeleteBitmapResponse update_delete_bitmap_res;
4775+
update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
4776+
update_delete_bitmap_req.set_table_id(112);
4777+
update_delete_bitmap_req.set_partition_id(123);
4778+
update_delete_bitmap_req.set_lock_id(888);
4779+
update_delete_bitmap_req.set_initiator(-1);
4780+
update_delete_bitmap_req.set_tablet_id(333);
4781+
std::string large_value = generate_random_string(300 * 1000 * 3);
4782+
update_delete_bitmap_req.add_rowset_ids("456");
4783+
update_delete_bitmap_req.add_segment_ids(0);
4784+
update_delete_bitmap_req.add_versions(2);
4785+
update_delete_bitmap_req.add_segment_delete_bitmaps(large_value);
4786+
meta_service->update_delete_bitmap(
4787+
reinterpret_cast<google::protobuf::RpcController*>(&cntl),
4788+
&update_delete_bitmap_req, &update_delete_bitmap_res, nullptr);
4789+
ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK);
4790+
4791+
GetDeleteBitmapRequest get_delete_bitmap_req;
4792+
GetDeleteBitmapResponse get_delete_bitmap_res;
4793+
get_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
4794+
get_delete_bitmap_req.set_tablet_id(333);
4795+
4796+
get_delete_bitmap_req.add_rowset_ids("456");
4797+
get_delete_bitmap_req.add_begin_versions(2);
4798+
get_delete_bitmap_req.add_end_versions(2);
4799+
4800+
meta_service->get_delete_bitmap(reinterpret_cast<google::protobuf::RpcController*>(&cntl),
4801+
&get_delete_bitmap_req, &get_delete_bitmap_res, nullptr);
4802+
ASSERT_EQ(get_delete_bitmap_res.status().code(), MetaServiceCode::OK);
4803+
ASSERT_EQ(get_delete_bitmap_res.rowset_ids_size(), 1);
4804+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);
4805+
ASSERT_EQ(get_delete_bitmap_res.versions_size(), 1);
4806+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);
4807+
4808+
ASSERT_EQ(get_delete_bitmap_res.rowset_ids(0), "456");
4809+
ASSERT_EQ(get_delete_bitmap_res.segment_ids(0), 0);
4810+
ASSERT_EQ(get_delete_bitmap_res.versions(0), 2);
4811+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(0), large_value);
4812+
}
4813+
4814+
{
4815+
//compaction update delete bitmap without lock
4816+
UpdateDeleteBitmapRequest update_delete_bitmap_req;
4817+
UpdateDeleteBitmapResponse update_delete_bitmap_res;
4818+
update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
4819+
update_delete_bitmap_req.set_table_id(112);
4820+
update_delete_bitmap_req.set_partition_id(123);
4821+
update_delete_bitmap_req.set_unlock(true);
4822+
update_delete_bitmap_req.set_lock_id(-3);
4823+
update_delete_bitmap_req.set_initiator(-1);
4824+
update_delete_bitmap_req.set_tablet_id(333);
4825+
std::string large_value = generate_random_string(300 * 1000);
4826+
update_delete_bitmap_req.add_rowset_ids("456");
4827+
update_delete_bitmap_req.add_segment_ids(0);
4828+
update_delete_bitmap_req.add_versions(2);
4829+
update_delete_bitmap_req.add_segment_delete_bitmaps(large_value);
4830+
meta_service->update_delete_bitmap(
4831+
reinterpret_cast<google::protobuf::RpcController*>(&cntl),
4832+
&update_delete_bitmap_req, &update_delete_bitmap_res, nullptr);
4833+
ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK);
4834+
4835+
GetDeleteBitmapRequest get_delete_bitmap_req;
4836+
GetDeleteBitmapResponse get_delete_bitmap_res;
4837+
get_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
4838+
get_delete_bitmap_req.set_tablet_id(333);
4839+
4840+
get_delete_bitmap_req.add_rowset_ids("456");
4841+
get_delete_bitmap_req.add_begin_versions(2);
4842+
get_delete_bitmap_req.add_end_versions(2);
4843+
4844+
meta_service->get_delete_bitmap(reinterpret_cast<google::protobuf::RpcController*>(&cntl),
4845+
&get_delete_bitmap_req, &get_delete_bitmap_res, nullptr);
4846+
ASSERT_EQ(get_delete_bitmap_res.status().code(), MetaServiceCode::OK);
4847+
ASSERT_EQ(get_delete_bitmap_res.rowset_ids_size(), 1);
4848+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);
4849+
ASSERT_EQ(get_delete_bitmap_res.versions_size(), 1);
4850+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);
4851+
4852+
ASSERT_EQ(get_delete_bitmap_res.rowset_ids(0), "456");
4853+
ASSERT_EQ(get_delete_bitmap_res.segment_ids(0), 0);
4854+
ASSERT_EQ(get_delete_bitmap_res.versions(0), 2);
4855+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(0), large_value);
4856+
}
47694857
}
47704858

47714859
TEST(MetaServiceTest, UpdateDeleteBitmapWithException) {

0 commit comments

Comments
 (0)