Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](cloud-mow) MS should delete the existing keys before rewriting it when processing old version delete bitmap on cu compaction #43479

Merged
merged 1 commit into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 17 additions & 28 deletions be/src/cloud/cloud_cumulative_compaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,12 +353,12 @@ Status CloudCumulativeCompaction::modify_rowsets() {
}
if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
_tablet->enable_unique_key_merge_on_write() && _input_rowsets.size() != 1) {
process_old_version_delete_bitmap();
RETURN_IF_ERROR(process_old_version_delete_bitmap());
}
return Status::OK();
}

void CloudCumulativeCompaction::process_old_version_delete_bitmap() {
Status CloudCumulativeCompaction::process_old_version_delete_bitmap() {
// agg previously rowset old version delete bitmap
std::vector<RowsetSharedPtr> pre_rowsets {};
std::vector<std::string> pre_rowset_ids {};
Expand Down Expand Up @@ -397,40 +397,29 @@ void CloudCumulativeCompaction::process_old_version_delete_bitmap() {
}
if (!new_delete_bitmap->empty()) {
// store agg delete bitmap
Status update_st;
DBUG_EXECUTE_IF("CloudCumulativeCompaction.modify_rowsets.update_delete_bitmap_failed",
{
update_st = Status::InternalError(
return Status::InternalError(
"test fail to update delete bitmap for tablet_id {}",
cloud_tablet()->tablet_id());
});
if (update_st.ok()) {
update_st = _engine.meta_mgr().update_delete_bitmap_without_lock(
*cloud_tablet(), new_delete_bitmap.get());
}
if (!update_st.ok()) {
std::stringstream ss;
ss << "failed to update delete bitmap for tablet=" << cloud_tablet()->tablet_id()
<< " st=" << update_st.to_string();
std::string msg = ss.str();
LOG(WARNING) << msg;
} else {
Version version(_input_rowsets.front()->start_version(),
_input_rowsets.back()->end_version());
for (auto it = new_delete_bitmap->delete_bitmap.begin();
it != new_delete_bitmap->delete_bitmap.end(); it++) {
_tablet->tablet_meta()->delete_bitmap().set(it->first, it->second);
}
_tablet->tablet_meta()->delete_bitmap().add_to_remove_queue(version.to_string(),
to_remove_vec);
DBUG_EXECUTE_IF(
"CloudCumulativeCompaction.modify_rowsets.delete_expired_stale_rowsets", {
static_cast<CloudTablet*>(_tablet.get())
->delete_expired_stale_rowsets();
});
RETURN_IF_ERROR(_engine.meta_mgr().cloud_update_delete_bitmap_without_lock(
*cloud_tablet(), new_delete_bitmap.get()));

Version version(_input_rowsets.front()->start_version(),
_input_rowsets.back()->end_version());
for (auto it = new_delete_bitmap->delete_bitmap.begin();
it != new_delete_bitmap->delete_bitmap.end(); it++) {
_tablet->tablet_meta()->delete_bitmap().set(it->first, it->second);
}
_tablet->tablet_meta()->delete_bitmap().add_to_remove_queue(version.to_string(),
to_remove_vec);
DBUG_EXECUTE_IF(
"CloudCumulativeCompaction.modify_rowsets.delete_expired_stale_rowsets",
{ static_cast<CloudTablet*>(_tablet.get())->delete_expired_stale_rowsets(); });
}
}
return Status::OK();
}

void CloudCumulativeCompaction::garbage_collection() {
Expand Down
2 changes: 1 addition & 1 deletion be/src/cloud/cloud_cumulative_compaction.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class CloudCumulativeCompaction : public CloudCompactionMixin {

void update_cumulative_point();

void process_old_version_delete_bitmap();
Status process_old_version_delete_bitmap();

ReaderType compaction_type() const override { return ReaderType::READER_CUMULATIVE_COMPACTION; }

Expand Down
12 changes: 7 additions & 5 deletions be/src/cloud/cloud_meta_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -705,8 +705,9 @@ Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_
for (size_t i = 0; i < rowset_ids.size(); i++) {
RowsetId rst_id;
rst_id.init(rowset_ids[i]);
delete_bitmap->merge({rst_id, segment_ids[i], vers[i]},
roaring::Roaring::read(delete_bitmaps[i].data()));
delete_bitmap->merge(
{rst_id, segment_ids[i], vers[i]},
roaring::Roaring::readSafe(delete_bitmaps[i].data(), delete_bitmaps[i].length()));
}
int64_t latency = cntl.latency_us();
if (latency > 100 * 1000) { // 100ms
Expand Down Expand Up @@ -1058,9 +1059,10 @@ Status CloudMetaMgr::update_delete_bitmap(const CloudTablet& tablet, int64_t loc
return st;
}

Status CloudMetaMgr::update_delete_bitmap_without_lock(const CloudTablet& tablet,
DeleteBitmap* delete_bitmap) {
VLOG_DEBUG << "update_delete_bitmap_without_lock , tablet_id: " << tablet.tablet_id();
Status CloudMetaMgr::cloud_update_delete_bitmap_without_lock(const CloudTablet& tablet,
DeleteBitmap* delete_bitmap) {
LOG(INFO) << "cloud_update_delete_bitmap_without_lock , tablet_id: " << tablet.tablet_id()
<< ",delete_bitmap size:" << delete_bitmap->delete_bitmap.size();
UpdateDeleteBitmapRequest req;
UpdateDeleteBitmapResponse res;
req.set_cloud_unique_id(config::cloud_unique_id);
Expand Down
4 changes: 2 additions & 2 deletions be/src/cloud/cloud_meta_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ class CloudMetaMgr {
Status update_delete_bitmap(const CloudTablet& tablet, int64_t lock_id, int64_t initiator,
DeleteBitmap* delete_bitmap);

Status update_delete_bitmap_without_lock(const CloudTablet& tablet,
DeleteBitmap* delete_bitmap);
Status cloud_update_delete_bitmap_without_lock(const CloudTablet& tablet,
DeleteBitmap* delete_bitmap);

Status get_delete_bitmap_update_lock(const CloudTablet& tablet, int64_t lock_id,
int64_t initiator);
Expand Down
13 changes: 12 additions & 1 deletion cloud/src/meta-service/meta_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1782,6 +1782,7 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
// lock_id > 0 : load
// lock_id = -1 : compaction
// lock_id = -2 : schema change
// lock_id = -3 : compaction update delete bitmap without lock
if (request->lock_id() > 0) {
std::string pending_val;
if (!delete_bitmap_keys.SerializeToString(&pending_val)) {
Expand All @@ -1794,6 +1795,15 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
fdb_txn_size = fdb_txn_size + pending_key.size() + pending_val.size();
LOG(INFO) << "xxx update delete bitmap put pending_key=" << hex(pending_key)
<< " lock_id=" << request->lock_id() << " value_size: " << pending_val.size();
} else if (request->lock_id() == -3) {
// delete existing key
for (size_t i = 0; i < request->rowset_ids_size(); ++i) {
auto& start_key = delete_bitmap_keys.delete_bitmap_keys(i);
std::string end_key {start_key};
encode_int64(INT64_MAX, &end_key);
txn->remove(start_key, end_key);
LOG(INFO) << "xxx remove existing key=" << hex(start_key) << " tablet_id=" << tablet_id;
}
}

// 4. Update delete bitmap for curent txn
Expand Down Expand Up @@ -1838,7 +1848,8 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
total_key++;
total_size += key.size() + val.size();
VLOG_DEBUG << "xxx update delete bitmap put delete_bitmap_key=" << hex(key)
<< " lock_id=" << request->lock_id() << " value_size: " << val.size();
<< " lock_id=" << request->lock_id() << " key_size: " << key.size()
<< " value_size: " << val.size();
}

err = txn->commit();
Expand Down
88 changes: 88 additions & 0 deletions cloud/test/meta_service_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4766,6 +4766,94 @@ TEST(MetaServiceTest, UpdateDeleteBitmap) {
ASSERT_EQ(get_delete_bitmap_res.versions(100), 3);
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(100), "abcd4");
}

// update existing delete bitmap key
{
//first update new key
UpdateDeleteBitmapRequest update_delete_bitmap_req;
UpdateDeleteBitmapResponse update_delete_bitmap_res;
update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
update_delete_bitmap_req.set_table_id(112);
update_delete_bitmap_req.set_partition_id(123);
update_delete_bitmap_req.set_lock_id(888);
update_delete_bitmap_req.set_initiator(-1);
update_delete_bitmap_req.set_tablet_id(333);
std::string large_value = generate_random_string(300 * 1000 * 3);
update_delete_bitmap_req.add_rowset_ids("456");
update_delete_bitmap_req.add_segment_ids(0);
update_delete_bitmap_req.add_versions(2);
update_delete_bitmap_req.add_segment_delete_bitmaps(large_value);
meta_service->update_delete_bitmap(
reinterpret_cast<google::protobuf::RpcController*>(&cntl),
&update_delete_bitmap_req, &update_delete_bitmap_res, nullptr);
ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK);

GetDeleteBitmapRequest get_delete_bitmap_req;
GetDeleteBitmapResponse get_delete_bitmap_res;
get_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
get_delete_bitmap_req.set_tablet_id(333);

get_delete_bitmap_req.add_rowset_ids("456");
get_delete_bitmap_req.add_begin_versions(2);
get_delete_bitmap_req.add_end_versions(2);

meta_service->get_delete_bitmap(reinterpret_cast<google::protobuf::RpcController*>(&cntl),
&get_delete_bitmap_req, &get_delete_bitmap_res, nullptr);
ASSERT_EQ(get_delete_bitmap_res.status().code(), MetaServiceCode::OK);
ASSERT_EQ(get_delete_bitmap_res.rowset_ids_size(), 1);
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);
ASSERT_EQ(get_delete_bitmap_res.versions_size(), 1);
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);

ASSERT_EQ(get_delete_bitmap_res.rowset_ids(0), "456");
ASSERT_EQ(get_delete_bitmap_res.segment_ids(0), 0);
ASSERT_EQ(get_delete_bitmap_res.versions(0), 2);
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(0), large_value);
}

{
//compaction update delete bitmap without lock
UpdateDeleteBitmapRequest update_delete_bitmap_req;
UpdateDeleteBitmapResponse update_delete_bitmap_res;
update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
update_delete_bitmap_req.set_table_id(112);
update_delete_bitmap_req.set_partition_id(123);
update_delete_bitmap_req.set_unlock(true);
update_delete_bitmap_req.set_lock_id(-3);
update_delete_bitmap_req.set_initiator(-1);
update_delete_bitmap_req.set_tablet_id(333);
std::string large_value = generate_random_string(300 * 1000);
update_delete_bitmap_req.add_rowset_ids("456");
update_delete_bitmap_req.add_segment_ids(0);
update_delete_bitmap_req.add_versions(2);
update_delete_bitmap_req.add_segment_delete_bitmaps(large_value);
meta_service->update_delete_bitmap(
reinterpret_cast<google::protobuf::RpcController*>(&cntl),
&update_delete_bitmap_req, &update_delete_bitmap_res, nullptr);
ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK);

GetDeleteBitmapRequest get_delete_bitmap_req;
GetDeleteBitmapResponse get_delete_bitmap_res;
get_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
get_delete_bitmap_req.set_tablet_id(333);

get_delete_bitmap_req.add_rowset_ids("456");
get_delete_bitmap_req.add_begin_versions(2);
get_delete_bitmap_req.add_end_versions(2);

meta_service->get_delete_bitmap(reinterpret_cast<google::protobuf::RpcController*>(&cntl),
&get_delete_bitmap_req, &get_delete_bitmap_res, nullptr);
ASSERT_EQ(get_delete_bitmap_res.status().code(), MetaServiceCode::OK);
ASSERT_EQ(get_delete_bitmap_res.rowset_ids_size(), 1);
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);
ASSERT_EQ(get_delete_bitmap_res.versions_size(), 1);
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);

ASSERT_EQ(get_delete_bitmap_res.rowset_ids(0), "456");
ASSERT_EQ(get_delete_bitmap_res.segment_ids(0), 0);
ASSERT_EQ(get_delete_bitmap_res.versions(0), 2);
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(0), large_value);
}
}

TEST(MetaServiceTest, UpdateDeleteBitmapWithException) {
Expand Down
Loading