diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index c7a86d19905ffa..f68cd5bba99c72 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -1187,8 +1187,9 @@ Status CloudMetaMgr::update_delete_bitmap(const CloudTablet& tablet, int64_t loc auto st = retry_rpc("update delete bitmap", req, &res, &MetaService_Stub::update_delete_bitmap); if (res.status().code() == MetaServiceCode::LOCK_EXPIRED) { return Status::Error( - "lock expired when update delete bitmap, tablet_id: {}, lock_id: {}", - tablet.tablet_id(), lock_id); + "lock expired when update delete bitmap, tablet_id: {}, lock_id: {}, initiator: " + "{}, error_msg: {}", + tablet.tablet_id(), lock_id, initiator, res.status().msg()); } return st; } diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index 232b142d7b6bd7..eb7d5c25fb81d7 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -235,6 +235,8 @@ CONF_mInt64(max_s3_client_retry, "10"); // Max byte getting delete bitmap can return, default is 1GB CONF_mInt64(max_get_delete_bitmap_byte, "1073741824"); +// retry configs of remove_delete_bitmap_update_lock txn_conflict +CONF_Bool(delete_bitmap_enable_retry_txn_conflict, "true"); // Max byte txn commit when updating delete bitmap, default is 7MB. // Because the size of one fdb transaction can't exceed 10MB, and diff --git a/cloud/src/meta-service/keys.cpp b/cloud/src/meta-service/keys.cpp index a518b6e264d20d..e089e0a1f0bcaa 100644 --- a/cloud/src/meta-service/keys.cpp +++ b/cloud/src/meta-service/keys.cpp @@ -33,6 +33,7 @@ static const char* STATS_KEY_PREFIX = "stats"; static const char* JOB_KEY_PREFIX = "job"; static const char* COPY_KEY_PREFIX = "copy"; static const char* VAULT_KEY_PREFIX = "storage_vault"; +static const char* MOW_KEY_PREFIX = "mow"; // Infix static const char* TXN_KEY_INFIX_LABEL = "txn_label"; @@ -51,6 +52,7 @@ static const char* META_KEY_INFIX_SCHEMA = "schema"; static const char* META_KEY_INFIX_DELETE_BITMAP = "delete_bitmap"; static const char* META_KEY_INFIX_DELETE_BITMAP_LOCK = "delete_bitmap_lock"; static const char* META_KEY_INFIX_DELETE_BITMAP_PENDING = "delete_bitmap_pending"; +static const char* META_KEY_INFIX_MOW_TABLET_COMPACTION = "mow_tablet_comp"; static const char* META_KEY_INFIX_SCHEMA_DICTIONARY = "tablet_schema_pb_dict"; static const char* RECYCLE_KEY_INFIX_INDEX = "index"; @@ -115,7 +117,8 @@ static void encode_prefix(const T& t, std::string* key) { RecycleIndexKeyInfo, RecyclePartKeyInfo, RecycleRowsetKeyInfo, RecycleTxnKeyInfo, RecycleStageKeyInfo, StatsTabletKeyInfo, TableVersionKeyInfo, JobTabletKeyInfo, JobRecycleKeyInfo, RLJobProgressKeyInfo, - CopyJobKeyInfo, CopyFileKeyInfo, StorageVaultKeyInfo, MetaSchemaPBDictionaryInfo>); + CopyJobKeyInfo, CopyFileKeyInfo, StorageVaultKeyInfo, MetaSchemaPBDictionaryInfo, + MowTabletCompactionInfo>); key->push_back(CLOUD_USER_KEY_SPACE01); // Prefixes for key families @@ -156,6 +159,8 @@ static void encode_prefix(const T& t, std::string* key) { encode_bytes(COPY_KEY_PREFIX, key); } else if constexpr (std::is_same_v) { encode_bytes(VAULT_KEY_PREFIX, key); + } else if constexpr(std::is_same_v) { + encode_bytes(MOW_KEY_PREFIX, key); } else { // This branch mean to be unreachable, add an assert(false) here to // prevent missing branch match. @@ -497,6 +502,13 @@ std::string system_meta_service_encryption_key_info_key() { // Other keys //============================================================================== +void mow_tablet_compaction_key(const MowTabletCompactionInfo& in, std::string* out) { + encode_prefix(in, out); // 0x01 "mow" ${instance_id} + encode_bytes(META_KEY_INFIX_MOW_TABLET_COMPACTION, out); // "mow_tablet_comp" + encode_int64(std::get<1>(in), out); // table_id + encode_int64(std::get<2>(in), out); // initiator +} + //============================================================================== // Decode keys //============================================================================== diff --git a/cloud/src/meta-service/keys.h b/cloud/src/meta-service/keys.h index c2c9a9dd4daa15..9ff51374927c7e 100644 --- a/cloud/src/meta-service/keys.h +++ b/cloud/src/meta-service/keys.h @@ -190,7 +190,8 @@ using StorageVaultKeyInfo = BasicKeyInfo<26, std::tuple>; // 0:instance_id 1:index_id using MetaSchemaPBDictionaryInfo = BasicKeyInfo<28 , std::tuple>; - +// 0:instance_id 1:table_id 2:initiator +using MowTabletCompactionInfo = BasicKeyInfo<29 , std::tuple>; void instance_key(const InstanceKeyInfo& in, std::string* out); static inline std::string instance_key(const InstanceKeyInfo& in) { std::string s; instance_key(in, &s); return s; } @@ -224,6 +225,7 @@ void meta_delete_bitmap_key(const MetaDeleteBitmapInfo& in, std::string* out); void meta_delete_bitmap_update_lock_key(const MetaDeleteBitmapUpdateLockInfo& in, std::string* out); void meta_pending_delete_bitmap_key(const MetaPendingDeleteBitmapInfo& in, std::string* out); void meta_schema_pb_dictionary_key(const MetaSchemaPBDictionaryInfo& in, std::string* out); +void mow_tablet_compaction_key(const MowTabletCompactionInfo& in, std::string* out); static inline std::string meta_rowset_key(const MetaRowsetKeyInfo& in) { std::string s; meta_rowset_key(in, &s); return s; } static inline std::string meta_rowset_tmp_key(const MetaRowsetTmpKeyInfo& in) { std::string s; meta_rowset_tmp_key(in, &s); return s; } static inline std::string meta_tablet_idx_key(const MetaTabletIdxKeyInfo& in) { std::string s; meta_tablet_idx_key(in, &s); return s; } @@ -233,6 +235,7 @@ static inline std::string meta_delete_bitmap_key(const MetaDeleteBitmapInfo& in) static inline std::string meta_delete_bitmap_update_lock_key(const MetaDeleteBitmapUpdateLockInfo& in) { std::string s; meta_delete_bitmap_update_lock_key(in, &s); return s; } static inline std::string meta_pending_delete_bitmap_key(const MetaPendingDeleteBitmapInfo& in) { std::string s; meta_pending_delete_bitmap_key(in, &s); return s; } static inline std::string meta_schema_pb_dictionary_key(const MetaSchemaPBDictionaryInfo& in) { std::string s; meta_schema_pb_dictionary_key(in, &s); return s; } +static inline std::string mow_tablet_compaction_key(const MowTabletCompactionInfo& in) { std::string s; mow_tablet_compaction_key(in, &s); return s; } std::string recycle_key_prefix(std::string_view instance_id); void recycle_index_key(const RecycleIndexKeyInfo& in, std::string* out); diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 6ceee1809392e6..9739a214b2afdd 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -1747,11 +1747,13 @@ void MetaServiceImpl::get_tablet_stats(::google::protobuf::RpcController* contro } static bool check_delete_bitmap_lock(MetaServiceCode& code, std::string& msg, std::stringstream& ss, - std::unique_ptr& txn, int64_t table_id, - int64_t lock_id, int64_t lock_initiator, std::string& lock_key, - DeleteBitmapUpdateLockPB& lock_info) { + std::unique_ptr& txn, std::string& instance_id, + int64_t table_id, int64_t lock_id, int64_t lock_initiator, + std::string& lock_key, DeleteBitmapUpdateLockPB& lock_info, + std::string log = "") { std::string lock_val; - LOG(INFO) << "check_delete_bitmap_lock, table_id=" << table_id << " key=" << hex(lock_key); + LOG(INFO) << "check_delete_bitmap_lock, table_id=" << table_id << " lock_id=" << lock_id + << " initiator=" << lock_initiator << " key=" << hex(lock_key) << log; auto err = txn->get(lock_key, &lock_val); TEST_SYNC_POINT_CALLBACK("check_delete_bitmap_lock.inject_get_lock_key_err", &err); if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { @@ -1777,17 +1779,39 @@ static bool check_delete_bitmap_lock(MetaServiceCode& code, std::string& msg, st code = MetaServiceCode::LOCK_EXPIRED; return false; } - bool found = false; - for (auto initiator : lock_info.initiators()) { - if (lock_initiator == initiator) { - found = true; - break; + if (lock_id == COMPACTION_DELETE_BITMAP_LOCK_ID) { + std::string tablet_compaction_key = + mow_tablet_compaction_key({instance_id, table_id, lock_initiator}); + std::string tablet_compaction_val; + err = txn->get(tablet_compaction_key, &tablet_compaction_val); + if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { + ss << "tablet compaction key not found, table_id=" << table_id << " lock_id" << lock_id + << " initiator=" << lock_initiator; + msg = ss.str(); + code = MetaServiceCode::LOCK_EXPIRED; + return false; + } + if (err != TxnErrorCode::TXN_OK) { + ss << "failed to get tablet compaction info, err=" << err; + msg = ss.str(); + code = cast_as(err); + return false; + } + // not check expired time + return true; + } else { + bool found = false; + for (auto initiator : lock_info.initiators()) { + if (lock_initiator == initiator) { + found = true; + break; + } + } + if (!found) { + msg = "lock initiator not exist"; + code = MetaServiceCode::LOCK_EXPIRED; + return false; } - } - if (!found) { - msg = "lock initiator not exist"; - code = MetaServiceCode::LOCK_EXPIRED; - return false; } return true; } @@ -1863,12 +1887,13 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont } bool unlock = request->has_unlock() ? request->unlock() : false; + std::string log = ", update delete bitmap for tablet " + std::to_string(tablet_id); if (!unlock) { // 1. Check whether the lock expires std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); DeleteBitmapUpdateLockPB lock_info; - if (!check_delete_bitmap_lock(code, msg, ss, txn, table_id, request->lock_id(), - request->initiator(), lock_key, lock_info)) { + if (!check_delete_bitmap_lock(code, msg, ss, txn, instance_id, table_id, request->lock_id(), + request->initiator(), lock_key, lock_info, log)) { LOG(WARNING) << "failed to check delete bitmap lock, table_id=" << table_id << " request lock_id=" << request->lock_id() << " request initiator=" << request->initiator() << " msg " << msg; @@ -1966,8 +1991,9 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); DeleteBitmapUpdateLockPB lock_info; - if (!check_delete_bitmap_lock(code, msg, ss, txn, table_id, request->lock_id(), - request->initiator(), lock_key, lock_info)) { + if (!check_delete_bitmap_lock(code, msg, ss, txn, instance_id, table_id, + request->lock_id(), request->initiator(), lock_key, + lock_info, log)) { LOG(WARNING) << "failed to check delete bitmap lock, table_id=" << table_id << " request lock_id=" << request->lock_id() << " request initiator=" << request->initiator() << " msg " << msg; @@ -2196,6 +2222,49 @@ void MetaServiceImpl::get_delete_bitmap(google::protobuf::RpcController* control } } +static bool put_mow_tablet_compaction_key(MetaServiceCode& code, std::string& msg, + std::unique_ptr& txn, + std::string& instance_id, int64_t table_id, + int64_t lock_id, int64_t initiator, int64_t expiration, + std::string& current_lock_msg) { + std::string tablet_compaction_key = + mow_tablet_compaction_key({instance_id, table_id, initiator}); + std::string tablet_compaction_val; + MowTabletCompactionPB mow_tablet_compaction; + mow_tablet_compaction.set_expiration(expiration); + mow_tablet_compaction.SerializeToString(&tablet_compaction_val); + if (tablet_compaction_val.empty()) { + code = MetaServiceCode::PROTOBUF_SERIALIZE_ERR; + msg = "MowTabletCompactionPB serialization error"; + return false; + } + txn->put(tablet_compaction_key, tablet_compaction_val); + LOG(INFO) << "xxx put tablet compaction key=" << hex(tablet_compaction_key) + << " table_id=" << table_id << " lock_id=" << lock_id << " initiator=" << initiator + << " expiration=" << expiration << ", " << current_lock_msg; + return true; +} + +static bool put_delete_bitmap_update_lock_key(MetaServiceCode& code, std::string& msg, + std::unique_ptr& txn, int64_t table_id, + int64_t lock_id, int64_t initiator, + std::string& lock_key, + DeleteBitmapUpdateLockPB& lock_info, + std::string& current_lock_msg) { + std::string lock_val; + lock_info.SerializeToString(&lock_val); + if (lock_val.empty()) { + code = MetaServiceCode::PROTOBUF_SERIALIZE_ERR; + msg = "DeleteBitmapUpdateLockPB serialization error"; + return false; + } + txn->put(lock_key, lock_val); + LOG(INFO) << "xxx put lock_key=" << hex(lock_key) << " table_id=" << table_id + << " lock_id=" << lock_id << " initiator=" << initiator + << " initiators_size=" << lock_info.initiators_size() << ", " << current_lock_msg; + return true; +} + void MetaServiceImpl::get_delete_bitmap_update_lock(google::protobuf::RpcController* controller, const GetDeleteBitmapUpdateLockRequest* request, GetDeleteBitmapUpdateLockResponse* response, @@ -2217,77 +2286,225 @@ void MetaServiceImpl::get_delete_bitmap_update_lock(google::protobuf::RpcControl } RPC_RATE_LIMIT(get_delete_bitmap_update_lock) - std::unique_ptr txn; - TxnErrorCode err = txn_kv_->create_txn(&txn); - if (err != TxnErrorCode::TXN_OK) { - code = cast_as(err); - msg = "failed to init txn"; - return; - } auto table_id = request->table_id(); std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); - std::string lock_val; - DeleteBitmapUpdateLockPB lock_info; - err = txn->get(lock_key, &lock_val); - if (err != TxnErrorCode::TXN_OK && err != TxnErrorCode::TXN_KEY_NOT_FOUND) { - ss << "failed to get delete bitmap update lock, instance_id=" << instance_id - << " table_id=" << table_id << " key=" << hex(lock_key) << " err=" << err; - msg = ss.str(); - code = MetaServiceCode::KV_TXN_GET_ERR; - return; - } - using namespace std::chrono; - int64_t now = duration_cast(system_clock::now().time_since_epoch()).count(); - if (err == TxnErrorCode::TXN_OK) { - if (!lock_info.ParseFromString(lock_val)) [[unlikely]] { - code = MetaServiceCode::PROTOBUF_PARSE_ERR; - msg = "failed to parse DeleteBitmapUpdateLockPB"; + bool first_retry = true; + int64_t retry = 0; + while (retry <= 1) { + retry++; + response->Clear(); + std::unique_ptr txn; + TxnErrorCode err = txn_kv_->create_txn(&txn); + if (err != TxnErrorCode::TXN_OK) { + code = cast_as(err); + msg = "failed to init txn"; return; } - if (lock_info.expiration() > 0 && lock_info.expiration() < now) { - LOG(INFO) << "delete bitmap lock expired, continue to process. lock_id=" - << lock_info.lock_id() << " table_id=" << table_id << " now=" << now; - lock_info.clear_initiators(); - } else if (lock_info.lock_id() != request->lock_id()) { - ss << "already be locked. request lock_id=" << request->lock_id() - << " locked by lock_id=" << lock_info.lock_id() << " table_id=" << table_id - << " now=" << now << " expiration=" << lock_info.expiration(); + std::string lock_val; + DeleteBitmapUpdateLockPB lock_info; + err = txn->get(lock_key, &lock_val); + if (err != TxnErrorCode::TXN_OK && err != TxnErrorCode::TXN_KEY_NOT_FOUND) { + ss << "failed to get delete bitmap update lock, instance_id=" << instance_id + << " table_id=" << table_id << " key=" << hex(lock_key) << " err=" << err; msg = ss.str(); - code = MetaServiceCode::LOCK_CONFLICT; + code = MetaServiceCode::KV_TXN_GET_ERR; return; } - } + using namespace std::chrono; + int64_t now = duration_cast(system_clock::now().time_since_epoch()).count(); + int64_t expiration = now + request->expiration(); + bool lock_key_not_found = false; + if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { + lock_key_not_found = true; + std::string current_lock_msg = "lock key not found"; + lock_info.set_lock_id(request->lock_id()); + // compaction does not use this expiration, only used when upgrade ms + lock_info.set_expiration(expiration); + if (request->lock_id() != COMPACTION_DELETE_BITMAP_LOCK_ID) { + lock_info.add_initiators(request->initiator()); + } else { + // in normal case, this should remove 0 kvs + // but when upgrade ms, if there are ms with old and new versions, it works + std::string tablet_compaction_key_begin = + mow_tablet_compaction_key({instance_id, table_id, 0}); + std::string tablet_compaction_key_end = + mow_tablet_compaction_key({instance_id, table_id, INT64_MAX}); + txn->remove(tablet_compaction_key_begin, tablet_compaction_key_end); + LOG(INFO) << "remove mow tablet compaction kv, begin=" + << hex(tablet_compaction_key_begin) + << " end=" << hex(tablet_compaction_key_end) << " table_id=" << table_id; + if (!put_mow_tablet_compaction_key(code, msg, txn, instance_id, table_id, + request->lock_id(), request->initiator(), + expiration, current_lock_msg)) { + return; + } + } + if (!put_delete_bitmap_update_lock_key(code, msg, txn, table_id, request->lock_id(), + request->initiator(), lock_key, lock_info, + current_lock_msg)) { + return; + } + } else if (err == TxnErrorCode::TXN_OK) { + if (!lock_info.ParseFromString(lock_val)) [[unlikely]] { + code = MetaServiceCode::PROTOBUF_PARSE_ERR; + msg = "failed to parse DeleteBitmapUpdateLockPB"; + return; + } + if (lock_info.lock_id() != COMPACTION_DELETE_BITMAP_LOCK_ID) { + if (lock_info.expiration() > 0 && lock_info.expiration() < now) { + LOG(INFO) << "delete bitmap lock expired, continue to process. lock_id=" + << lock_info.lock_id() << " table_id=" << table_id + << " expiration=" << lock_info.expiration() << " now=" << now + << " initiator_size=" << lock_info.initiators_size(); + lock_info.clear_initiators(); + } else if (lock_info.lock_id() != request->lock_id()) { + ss << "already be locked by lock_id=" << lock_info.lock_id() + << " expiration=" << lock_info.expiration() << " now=" << now + << ", request lock_id=" << request->lock_id() << " table_id=" << table_id + << " initiator=" << request->initiator(); + msg = ss.str(); + code = MetaServiceCode::LOCK_CONFLICT; + return; + } + std::string current_lock_msg = + "original lock_id=" + std::to_string(lock_info.lock_id()); + lock_info.set_lock_id(request->lock_id()); + // compaction does not use the expiration, only used when upgrade ms + lock_info.set_expiration(expiration); + if (request->lock_id() != COMPACTION_DELETE_BITMAP_LOCK_ID) { + bool found = false; + for (auto initiator : lock_info.initiators()) { + if (request->initiator() == initiator) { + found = true; + break; + } + } + if (!found) { + lock_info.add_initiators(request->initiator()); + } + } else { + lock_key_not_found = true; + // in normal case, this should remove 0 kvs + // but when upgrade ms, if there are ms with old and new versions, it works + std::string tablet_compaction_key_begin = + mow_tablet_compaction_key({instance_id, table_id, 0}); + std::string tablet_compaction_key_end = + mow_tablet_compaction_key({instance_id, table_id, INT64_MAX}); + txn->remove(tablet_compaction_key_begin, tablet_compaction_key_end); + LOG(INFO) << "remove mow tablet compaction kv, begin=" + << hex(tablet_compaction_key_begin) + << " end=" << hex(tablet_compaction_key_end) + << " table_id=" << table_id; + if (!put_mow_tablet_compaction_key(code, msg, txn, instance_id, table_id, + request->lock_id(), request->initiator(), + expiration, current_lock_msg)) { + return; + } + } + if (!put_delete_bitmap_update_lock_key(code, msg, txn, table_id, request->lock_id(), + request->initiator(), lock_key, lock_info, + current_lock_msg)) { + return; + } + } else { + if (request->lock_id() == COMPACTION_DELETE_BITMAP_LOCK_ID) { + std::string current_lock_msg = "locked by lock_id=-1"; + if (!put_mow_tablet_compaction_key(code, msg, txn, instance_id, table_id, + request->lock_id(), request->initiator(), + expiration, current_lock_msg)) { + return; + } + } else { + // check if compaction key is expired + bool has_unexpired_compaction = false; + int64_t unexpired_expiration = 0; + std::string key0 = mow_tablet_compaction_key({instance_id, table_id, 0}); + std::string key1 = mow_tablet_compaction_key({instance_id, table_id + 1, 0}); + MowTabletCompactionPB mow_tablet_compaction; + std::unique_ptr it; + int64_t expired_compaction_num = 0; + do { + err = txn->get(key0, key1, &it); + if (err != TxnErrorCode::TXN_OK) { + code = cast_as(err); + ss << "internal error, failed to get mow tablet compaction, err=" + << err; + msg = ss.str(); + LOG(WARNING) << msg; + return; + } + + while (it->has_next() && !has_unexpired_compaction) { + auto [k, v] = it->next(); + if (!mow_tablet_compaction.ParseFromArray(v.data(), v.size())) + [[unlikely]] { + code = MetaServiceCode::PROTOBUF_PARSE_ERR; + msg = "failed to parse MowTabletCompactionPB"; + return; + } + if (mow_tablet_compaction.expiration() > 0 && + mow_tablet_compaction.expiration() < now) { + LOG(INFO) << "remove mow tablet compaction lock. table_id=" + << table_id << " lock_id=" << lock_info.lock_id() + << " expiration=" << mow_tablet_compaction.expiration() + << " now=" << now << " key=" << hex(k); + txn->remove(k); + expired_compaction_num++; + } else { + has_unexpired_compaction = true; + unexpired_expiration = mow_tablet_compaction.expiration(); + } + } + key0 = it->next_begin_key(); // Update to next smallest key for iteration + } while (it->more() && !has_unexpired_compaction); + if (has_unexpired_compaction) { + // TODO print initiator + ss << "already be locked by lock_id=" << lock_info.lock_id() + << " expiration=" << unexpired_expiration << " now=" << now + << ". request lock_id=" << request->lock_id() << " table_id=" << table_id + << " initiator=" << request->initiator(); + msg = ss.str(); + code = MetaServiceCode::LOCK_CONFLICT; + return; + } + // all compaction is expired + lock_info.set_lock_id(request->lock_id()); + lock_info.set_expiration(expiration); + lock_info.clear_initiators(); + lock_info.add_initiators(request->initiator()); + std::string current_lock_msg = + std::to_string(expired_compaction_num) + " compaction is expired"; + if (!put_delete_bitmap_update_lock_key(code, msg, txn, table_id, + request->lock_id(), request->initiator(), + lock_key, lock_info, current_lock_msg)) { + return; + } + } + } + } - lock_info.set_lock_id(request->lock_id()); - lock_info.set_expiration(now + request->expiration()); - bool found = false; - for (auto initiator : lock_info.initiators()) { - if (request->initiator() == initiator) { - found = true; + err = txn->commit(); + TEST_SYNC_POINT_CALLBACK("get_delete_bitmap_update_lock:commit:conflict", &first_retry, + &err); + if (err == TxnErrorCode::TXN_OK) { break; + } else if (err == TxnErrorCode::TXN_CONFLICT && lock_key_not_found && + request->lock_id() == COMPACTION_DELETE_BITMAP_LOCK_ID && + config::delete_bitmap_enable_retry_txn_conflict && first_retry) { + // if err is TXN_CONFLICT, and the lock id is -1, do a fast retry + LOG(INFO) << "fast retry to get_delete_bitmap_update_lock, tablet_id=" + << request->table_id() << " lock_id=" << request->lock_id() + << ", initiator=" << request->initiator() << ", err=" << err; + first_retry = false; + continue; + } else { + code = cast_as(err); + ss << "failed to get_delete_bitmap_update_lock, lock_id=" << request->lock_id() + << ", initiator=" << request->initiator() << ", err=" << err; + msg = ss.str(); + return; } } - if (!found) { - lock_info.add_initiators(request->initiator()); - } - lock_info.SerializeToString(&lock_val); - if (lock_val.empty()) { - code = MetaServiceCode::PROTOBUF_SERIALIZE_ERR; - msg = "pb serialization error"; - return; - } - txn->put(lock_key, lock_val); - LOG(INFO) << "xxx put lock_key=" << hex(lock_key) << " table_id=" << table_id - << " lock_id=" << request->lock_id() << " initiator=" << request->initiator() - << " initiators_size=" << lock_info.initiators_size(); - - err = txn->commit(); - if (err != TxnErrorCode::TXN_OK) { - code = cast_as(err); - ss << "failed to get_delete_bitmap_update_lock, err=" << err; - msg = ss.str(); - return; - } bool require_tablet_stats = request->has_require_compaction_stats() ? request->require_compaction_stats() : false; @@ -2302,7 +2519,8 @@ void MetaServiceImpl::get_delete_bitmap_update_lock(google::protobuf::RpcControl // these steps can be done in different fdb txns StopWatch read_stats_sw; - err = txn_kv_->create_txn(&txn); + std::unique_ptr txn; + TxnErrorCode err = txn_kv_->create_txn(&txn); if (err != TxnErrorCode::TXN_OK) { code = cast_as(err); msg = "failed to init txn"; @@ -2316,7 +2534,7 @@ void MetaServiceImpl::get_delete_bitmap_update_lock(google::protobuf::RpcControl stats_tablet_key({instance_id, tablet_idx.table_id(), tablet_idx.index_id(), tablet_idx.partition_id(), tablet_idx.tablet_id()}); std::string stats_val; - TxnErrorCode err = txn->get(stats_key, &stats_val); + err = txn->get(stats_key, &stats_val); TEST_SYNC_POINT_CALLBACK("get_delete_bitmap_update_lock.get_compaction_cnts_inject_error", &err); if (err == TxnErrorCode::TXN_TOO_OLD) { @@ -2377,7 +2595,7 @@ void MetaServiceImpl::get_delete_bitmap_update_lock(google::protobuf::RpcControl request->tablet_indexes().size(), read_stats_sw.elapsed_us() / 1000); DeleteBitmapUpdateLockPB lock_info_tmp; - if (!check_delete_bitmap_lock(code, msg, ss, txn, table_id, request->lock_id(), + if (!check_delete_bitmap_lock(code, msg, ss, txn, instance_id, table_id, request->lock_id(), request->initiator(), lock_key, lock_info_tmp)) { LOG(WARNING) << "failed to check delete bitmap lock after get tablet stats and tablet " "states, table_id=" @@ -2415,47 +2633,58 @@ void MetaServiceImpl::remove_delete_bitmap_update_lock( msg = "failed to init txn"; return; } - std::string lock_key = - meta_delete_bitmap_update_lock_key({instance_id, request->table_id(), -1}); - std::string lock_val; - DeleteBitmapUpdateLockPB lock_info; - if (!check_delete_bitmap_lock(code, msg, ss, txn, request->table_id(), request->lock_id(), - request->initiator(), lock_key, lock_info)) { - LOG(WARNING) << "failed to check delete bitmap tablet lock" - << " table_id=" << request->table_id() << " tablet_id=" << request->tablet_id() - << " request lock_id=" << request->lock_id() - << " request initiator=" << request->initiator() << " msg " << msg; - return; - } - bool modify_initiators = false; - auto initiators = lock_info.mutable_initiators(); - for (auto iter = initiators->begin(); iter != initiators->end(); iter++) { - if (*iter == request->initiator()) { - initiators->erase(iter); - modify_initiators = true; - break; - } - } - if (!modify_initiators) { - LOG(INFO) << "initiators don't have initiator=" << request->initiator() - << ",initiators_size=" << lock_info.initiators_size() << ",just return"; - return; - } else if (initiators->empty()) { - LOG(INFO) << "remove delete bitmap lock, table_id=" << request->table_id() - << " lock_id=" << request->lock_id() << " key=" << hex(lock_key); - txn->remove(lock_key); + if (request->lock_id() == COMPACTION_DELETE_BITMAP_LOCK_ID) { + std::string tablet_compaction_key = + mow_tablet_compaction_key({instance_id, request->table_id(), request->initiator()}); + txn->remove(tablet_compaction_key); + LOG(INFO) << "remove tablet compaction lock, table_id=" << request->table_id() + << " lock_id=" << request->lock_id() << " initiator=" << request->initiator() + << " key=" << hex(tablet_compaction_key); } else { - lock_info.SerializeToString(&lock_val); - if (lock_val.empty()) { - LOG(WARNING) << "failed to seiralize lock_info, table_id=" << request->table_id() - << " key=" << hex(lock_key); + std::string lock_key = + meta_delete_bitmap_update_lock_key({instance_id, request->table_id(), -1}); + std::string lock_val; + DeleteBitmapUpdateLockPB lock_info; + if (!check_delete_bitmap_lock(code, msg, ss, txn, instance_id, request->table_id(), + request->lock_id(), request->initiator(), lock_key, lock_info, + ", remove lock")) { + LOG(WARNING) << "failed to check delete bitmap tablet lock" + << " table_id=" << request->table_id() + << " tablet_id=" << request->tablet_id() + << " request lock_id=" << request->lock_id() + << " request initiator=" << request->initiator() << " msg " << msg; return; } - LOG(INFO) << "remove delete bitmap lock initiator, table_id=" << request->table_id() - << ", key=" << hex(lock_key) << " lock_id=" << request->lock_id() - << " initiator=" << request->initiator() - << " initiators_size=" << lock_info.initiators_size(); - txn->put(lock_key, lock_val); + bool modify_initiators = false; + auto initiators = lock_info.mutable_initiators(); + for (auto iter = initiators->begin(); iter != initiators->end(); iter++) { + if (*iter == request->initiator()) { + initiators->erase(iter); + modify_initiators = true; + break; + } + } + if (!modify_initiators) { + LOG(INFO) << "initiators don't have initiator=" << request->initiator() + << ",initiators_size=" << lock_info.initiators_size() << ",just return"; + return; + } else if (initiators->empty()) { + LOG(INFO) << "remove delete bitmap lock, table_id=" << request->table_id() + << " lock_id=" << request->lock_id() << " key=" << hex(lock_key); + txn->remove(lock_key); + } else { + lock_info.SerializeToString(&lock_val); + if (lock_val.empty()) { + LOG(WARNING) << "failed to seiralize lock_info, table_id=" << request->table_id() + << " key=" << hex(lock_key); + return; + } + LOG(INFO) << "remove delete bitmap lock initiator, table_id=" << request->table_id() + << ", key=" << hex(lock_key) << " lock_id=" << request->lock_id() + << " initiator=" << request->initiator() + << " initiators_size=" << lock_info.initiators_size(); + txn->put(lock_key, lock_val); + } } err = txn->commit(); if (err != TxnErrorCode::TXN_OK) { diff --git a/cloud/src/meta-service/meta_service.h b/cloud/src/meta-service/meta_service.h index 6df09bd2c20702..57f88d51dfe904 100644 --- a/cloud/src/meta-service/meta_service.h +++ b/cloud/src/meta-service/meta_service.h @@ -39,6 +39,8 @@ namespace doris::cloud { class Transaction; constexpr std::string_view BUILT_IN_STORAGE_VAULT_NAME = "built_in_storage_vault"; +static constexpr int COMPACTION_DELETE_BITMAP_LOCK_ID = -1; +static constexpr int SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID = -2; void internal_get_rowset(Transaction* txn, int64_t start, int64_t end, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code, diff --git a/cloud/src/meta-service/meta_service_job.cpp b/cloud/src/meta-service/meta_service_job.cpp index 5299b85f41d9a9..4fbc74662bdce6 100644 --- a/cloud/src/meta-service/meta_service_job.cpp +++ b/cloud/src/meta-service/meta_service_job.cpp @@ -45,9 +45,6 @@ static inline constexpr size_t get_file_name_offset(const T (&s)[S], size_t i = namespace doris::cloud { -static constexpr int COMPACTION_DELETE_BITMAP_LOCK_ID = -1; -static constexpr int SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID = -2; - // check compaction input_versions are valid during schema change. // If the schema change job doesnt have alter version, it dont need to check // because the schema change job is come from old version BE. @@ -454,12 +451,14 @@ static bool check_and_remove_delete_bitmap_update_lock(MetaServiceCode& code, st std::stringstream& ss, std::unique_ptr& txn, std::string& instance_id, int64_t table_id, - int64_t lock_id, int64_t lock_initiator) { + int64_t tablet_id, int64_t lock_id, + int64_t lock_initiator) { std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); std::string lock_val; TxnErrorCode err = txn->get(lock_key, &lock_val); - LOG(INFO) << "get remove delete bitmap update lock info, table_id=" << table_id - << " key=" << hex(lock_key) << " err=" << err; + LOG(INFO) << "get delete bitmap update lock info, table_id=" << table_id + << " tablet_id=" << tablet_id << " lock_id=" << lock_id + << " initiator=" << lock_initiator << " key=" << hex(lock_key) << " err=" << err; if (err != TxnErrorCode::TXN_OK) { ss << "failed to get delete bitmap update lock key, instance_id=" << instance_id << " table_id=" << table_id << " key=" << hex(lock_key) << " err=" << err; @@ -479,92 +478,149 @@ static bool check_and_remove_delete_bitmap_update_lock(MetaServiceCode& code, st code = MetaServiceCode::LOCK_EXPIRED; return false; } - bool found = false; - auto initiators = lock_info.mutable_initiators(); - for (auto iter = initiators->begin(); iter != initiators->end(); iter++) { - if (*iter == lock_initiator) { - initiators->erase(iter); - found = true; - break; + if (lock_id == COMPACTION_DELETE_BITMAP_LOCK_ID) { + // when upgrade ms, prevent old ms get delete bitmap update lock + if (lock_info.initiators_size() > 0) { + ss << "compaction lock has " << lock_info.initiators_size() << " initiators"; + msg = ss.str(); + code = MetaServiceCode::LOCK_EXPIRED; + return false; } - } - if (!found) { - ss << "lock initiator " << lock_initiator << " not exist"; - msg = ss.str(); - code = MetaServiceCode::LOCK_EXPIRED; - return false; - } - if (initiators->empty()) { - INSTANCE_LOG(INFO) << "remove delete bitmap lock, table_id=" << table_id - << " lock_id=" << lock_id << " key=" << hex(lock_key); - txn->remove(lock_key); + std::string tablet_compaction_key = + mow_tablet_compaction_key({instance_id, table_id, lock_initiator}); + std::string tablet_compaction_val; + err = txn->get(tablet_compaction_key, &tablet_compaction_val); + if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { + ss << "lock initiator " << lock_initiator << " not exist"; + msg = ss.str(); + code = MetaServiceCode::LOCK_EXPIRED; + return false; + } else if (err != TxnErrorCode::TXN_OK) { + ss << "failed to get tablet compaction key, instance_id=" << instance_id + << " table_id=" << table_id << " tablet_id=" << tablet_id + << " initiator=" << lock_initiator << " key=" << hex(tablet_compaction_key) + << " err=" << err; + msg = ss.str(); + code = cast_as(err); + return false; + } + txn->remove(tablet_compaction_key); + INSTANCE_LOG(INFO) << "remove tablet compaction lock, table_id=" << table_id + << " tablet_id=" << tablet_id << " lock_id=" << lock_id + << " initiator=" << lock_initiator + << " key=" << hex(tablet_compaction_key); + // may left a lock key for -1 return true; + } else { + // TODO does not check expired time + bool found = false; + auto initiators = lock_info.mutable_initiators(); + for (auto iter = initiators->begin(); iter != initiators->end(); iter++) { + if (*iter == lock_initiator) { + initiators->erase(iter); + found = true; + break; + } + } + if (!found) { + ss << "lock initiator " << lock_initiator << " not exist"; + msg = ss.str(); + code = MetaServiceCode::LOCK_EXPIRED; + return false; + } + if (initiators->empty()) { + INSTANCE_LOG(INFO) << "remove delete bitmap lock, table_id=" << table_id + << " tablet_id=" << tablet_id << " lock_id=" << lock_id + << " key=" << hex(lock_key); + txn->remove(lock_key); + return true; + } + lock_info.SerializeToString(&lock_val); + if (lock_val.empty()) { + code = MetaServiceCode::PROTOBUF_SERIALIZE_ERR; + msg = "pb serialization error"; + return false; + } + INSTANCE_LOG(INFO) << "remove delete bitmap lock initiator, table_id=" << table_id + << " tablet_id=" << tablet_id << ", key=" << hex(lock_key) + << " lock_id=" << lock_id << " initiator=" << lock_initiator + << " initiators_size=" << lock_info.initiators_size(); + txn->put(lock_key, lock_val); } - lock_info.SerializeToString(&lock_val); - if (lock_val.empty()) { - code = MetaServiceCode::PROTOBUF_SERIALIZE_ERR; - msg = "pb serialization error"; - return false; - } - INSTANCE_LOG(INFO) << "remove delete bitmap lock initiator, table_id=" << table_id - << ", key=" << hex(lock_key) << " lock_id=" << lock_id - << " initiator=" << lock_initiator - << " initiators_size=" << lock_info.initiators_size(); - txn->put(lock_key, lock_val); return true; } static void remove_delete_bitmap_update_lock(std::unique_ptr& txn, const std::string& instance_id, int64_t table_id, - int64_t lock_id, int64_t lock_initiator) { - std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); - std::string lock_val; - TxnErrorCode err = txn->get(lock_key, &lock_val); - LOG(INFO) << "get remove delete bitmap update lock info, table_id=" << table_id - << " key=" << hex(lock_key) << " err=" << err; - if (err != TxnErrorCode::TXN_OK) { - LOG(WARNING) << "failed to get delete bitmap update lock key, instance_id=" << instance_id - << " table_id=" << table_id << " key=" << hex(lock_key) << " err=" << err; - return; - } - DeleteBitmapUpdateLockPB lock_info; - if (!lock_info.ParseFromString(lock_val)) [[unlikely]] { - LOG(WARNING) << "failed to parse DeleteBitmapUpdateLockPB, instance_id=" << instance_id - << " table_id=" << table_id << " key=" << hex(lock_key); - return; - } - if (lock_info.lock_id() != lock_id) { - return; - } - bool found = false; - auto initiators = lock_info.mutable_initiators(); - for (auto iter = initiators->begin(); iter != initiators->end(); iter++) { - if (*iter == lock_initiator) { - initiators->erase(iter); - found = true; - break; + int64_t tablet_id, int64_t lock_id, + int64_t lock_initiator) { + if (lock_id == COMPACTION_DELETE_BITMAP_LOCK_ID) { + std::string tablet_compaction_key = + mow_tablet_compaction_key({instance_id, table_id, lock_initiator}); + std::string tablet_compaction_val; + TxnErrorCode err = txn->get(tablet_compaction_key, &tablet_compaction_val); + if (err != TxnErrorCode::TXN_OK) { + LOG(WARNING) << "failed to get tablet compaction key, instance_id=" << instance_id + << " table_id=" << table_id << " initiator=" << lock_initiator + << " key=" << hex(tablet_compaction_key) << " err=" << err; + return; } + txn->remove(tablet_compaction_key); + INSTANCE_LOG(INFO) << "remove tablet compaction key, table_id=" << table_id + << ", key=" << hex(tablet_compaction_key) + << " initiator=" << lock_initiator; + } else { + std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); + std::string lock_val; + TxnErrorCode err = txn->get(lock_key, &lock_val); + LOG(INFO) << "get remove delete bitmap update lock info, table_id=" << table_id + << " key=" << hex(lock_key) << " err=" << err; + if (err != TxnErrorCode::TXN_OK) { + LOG(WARNING) << "failed to get delete bitmap update lock key, instance_id=" + << instance_id << " table_id=" << table_id << " key=" << hex(lock_key) + << " err=" << err; + return; + } + DeleteBitmapUpdateLockPB lock_info; + if (!lock_info.ParseFromString(lock_val)) [[unlikely]] { + LOG(WARNING) << "failed to parse DeleteBitmapUpdateLockPB, instance_id=" << instance_id + << " table_id=" << table_id << " key=" << hex(lock_key); + return; + } + if (lock_info.lock_id() != lock_id) { + return; + } + bool found = false; + auto initiators = lock_info.mutable_initiators(); + for (auto iter = initiators->begin(); iter != initiators->end(); iter++) { + if (*iter == lock_initiator) { + initiators->erase(iter); + found = true; + break; + } + } + if (!found) { + return; + } + if (initiators->empty()) { + INSTANCE_LOG(INFO) << "remove delete bitmap lock, table_id=" << table_id + << " tablet_id=" << tablet_id << " lock_id=" << lock_id + << " initiator=" << lock_initiator << " key=" << hex(lock_key); + txn->remove(lock_key); + return; + } + lock_info.SerializeToString(&lock_val); + if (lock_val.empty()) { + INSTANCE_LOG(WARNING) << "failed to seiralize lock_info, table_id=" << table_id + << " key=" << hex(lock_key); + return; + } + INSTANCE_LOG(INFO) << "remove delete bitmap lock initiator, table_id=" << table_id + << " tablet_id=" << tablet_id << " key=" << hex(lock_key) + << " lock_id=" << lock_id << " initiator=" << lock_initiator + << " initiators_size=" << lock_info.initiators_size(); + txn->put(lock_key, lock_val); } - if (!found) { - return; - } - if (initiators->empty()) { - INSTANCE_LOG(INFO) << "remove delete bitmap lock, table_id=" << table_id - << " lock_id=" << lock_id << " key=" << hex(lock_key); - txn->remove(lock_key); - return; - } - lock_info.SerializeToString(&lock_val); - if (lock_val.empty()) { - INSTANCE_LOG(WARNING) << "failed to seiralize lock_info, table_id=" << table_id - << " key=" << hex(lock_key); - return; - } - INSTANCE_LOG(INFO) << "remove delete bitmap lock initiator, table_id=" << table_id - << ", key=" << hex(lock_key) << " lock_id=" << lock_id - << " initiator=" << lock_initiator - << " initiators_size=" << lock_info.initiators_size(); - txn->put(lock_key, lock_val); } void process_compaction_job(MetaServiceCode& code, std::string& msg, std::stringstream& ss, @@ -642,7 +698,7 @@ void process_compaction_job(MetaServiceCode& code, std::string& msg, std::string INSTANCE_LOG(INFO) << "abort tablet compaction job, tablet_id=" << tablet_id << " key=" << hex(job_key); if (compaction.has_delete_bitmap_lock_initiator()) { - remove_delete_bitmap_update_lock(txn, instance_id, table_id, + remove_delete_bitmap_update_lock(txn, instance_id, table_id, tablet_id, COMPACTION_DELETE_BITMAP_LOCK_ID, compaction.delete_bitmap_lock_initiator()); } @@ -795,8 +851,8 @@ void process_compaction_job(MetaServiceCode& code, std::string& msg, std::string // remove delete bitmap update lock for MoW table if (compaction.has_delete_bitmap_lock_initiator()) { bool success = check_and_remove_delete_bitmap_update_lock( - code, msg, ss, txn, instance_id, table_id, COMPACTION_DELETE_BITMAP_LOCK_ID, - compaction.delete_bitmap_lock_initiator()); + code, msg, ss, txn, instance_id, table_id, tablet_id, + COMPACTION_DELETE_BITMAP_LOCK_ID, compaction.delete_bitmap_lock_initiator()); if (!success) { return; } @@ -1251,8 +1307,8 @@ void process_schema_change_job(MetaServiceCode& code, std::string& msg, std::str // process mow table, check lock if (new_tablet_meta.enable_unique_key_merge_on_write()) { bool success = check_and_remove_delete_bitmap_update_lock( - code, msg, ss, txn, instance_id, new_table_id, SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID, - schema_change.delete_bitmap_lock_initiator()); + code, msg, ss, txn, instance_id, new_table_id, new_tablet_id, + SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID, schema_change.delete_bitmap_lock_initiator()); if (!success) { return; } diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp index 40c328d8945f39..581ee4067d4c17 100644 --- a/cloud/src/recycler/recycler.cpp +++ b/cloud/src/recycler/recycler.cpp @@ -1212,6 +1212,19 @@ int InstanceRecycler::recycle_versions() { auto tbl_version_key = table_version_key({instance_id_, db_id, table_id}); txn->remove(tbl_version_key); LOG(WARNING) << "remove table version kv " << hex(tbl_version_key); + // 3. Remove mow delete bitmap update lock and tablet compaction lock + std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1}); + txn->remove(lock_key); + LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key); + std::string tablet_compaction_key_begin = + mow_tablet_compaction_key({instance_id_, table_id, 0}); + std::string tablet_compaction_key_end = + mow_tablet_compaction_key({instance_id_, table_id, INT64_MAX}); + txn->remove(tablet_compaction_key_begin, tablet_compaction_key_end); + LOG(WARNING) << "remove mow tablet compaction kv, begin=" + << hex(tablet_compaction_key_begin) + << " end=" << hex(tablet_compaction_key_end) << " db_id=" << db_id + << " table_id=" << table_id; err = txn->commit(); if (err != TxnErrorCode::TXN_OK) { return -1; diff --git a/cloud/test/meta_service_job_test.cpp b/cloud/test/meta_service_job_test.cpp index 998a24256f43c0..934ced09638abb 100644 --- a/cloud/test/meta_service_job_test.cpp +++ b/cloud/test/meta_service_job_test.cpp @@ -152,13 +152,13 @@ void insert_rowsets(TxnKv* txn_kv, int64_t table_id, int64_t index_id, int64_t p } MetaServiceCode get_delete_bitmap_lock(MetaServiceProxy* meta_service, int64_t table_id, - int64_t lock_id, int64_t initor) { + int64_t lock_id, int64_t initor, int64_t expiration = 5) { brpc::Controller cntl; GetDeleteBitmapUpdateLockRequest req; GetDeleteBitmapUpdateLockResponse res; req.set_cloud_unique_id("test_cloud_unique_id"); req.set_table_id(table_id); - req.set_expiration(5); + req.set_expiration(expiration); req.set_lock_id(lock_id); req.set_initiator(initor); meta_service->get_delete_bitmap_update_lock( @@ -166,11 +166,29 @@ MetaServiceCode get_delete_bitmap_lock(MetaServiceProxy* meta_service, int64_t t return res.status().code(); } +MetaServiceCode remove_delete_bitmap_lock(MetaServiceProxy* meta_service, int64_t table_id, + int64_t lock_id, int64_t initiator) { + brpc::Controller cntl; + RemoveDeleteBitmapUpdateLockRequest req; + RemoveDeleteBitmapUpdateLockResponse res; + req.set_cloud_unique_id("test_cloud_unique_id"); + req.set_table_id(table_id); + req.set_lock_id(lock_id); + req.set_initiator(initiator); + meta_service->remove_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + return res.status().code(); +} + void remove_delete_bitmap_lock(MetaServiceProxy* meta_service, int64_t table_id) { std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); std::unique_ptr txn; ASSERT_EQ(meta_service->txn_kv()->create_txn(&txn), TxnErrorCode::TXN_OK); txn->remove(lock_key); + std::string tablet_compaction_key_begin = mow_tablet_compaction_key({instance_id, table_id, 0}); + std::string tablet_compaction_key_end = + mow_tablet_compaction_key({instance_id, table_id, INT64_MAX}); + txn->remove(tablet_compaction_key_begin, tablet_compaction_key_end); ASSERT_EQ(txn->commit(), TxnErrorCode::TXN_OK); } @@ -1287,6 +1305,8 @@ TEST(MetaServiceJobTest, CompactionJobWithMoWTest) { ASSERT_EQ(txn->commit(), TxnErrorCode::TXN_OK); }; + remove_delete_bitmap_lock(meta_service.get(), 1); + remove_delete_bitmap_lock(meta_service.get(), 2); test_start_compaction_job(1, 2, 3, 4, TabletCompactionJobPB::CUMULATIVE); test_commit_compaction_job(1, 2, 3, 4, TabletCompactionJobPB::CUMULATIVE); ASSERT_EQ(res.status().code(), MetaServiceCode::KV_TXN_GET_ERR); @@ -1296,21 +1316,22 @@ TEST(MetaServiceJobTest, CompactionJobWithMoWTest) { ASSERT_EQ(res_code, MetaServiceCode::OK); test_commit_compaction_job(1, 2, 3, 4, TabletCompactionJobPB::CUMULATIVE); ASSERT_EQ(res.status().code(), MetaServiceCode::LOCK_EXPIRED); - remove_delete_bitmap_lock(meta_service.get(), 1); + res_code = remove_delete_bitmap_lock(meta_service.get(), 1, 1, 1); + ASSERT_EQ(res_code, MetaServiceCode::OK); clear_rowsets(4); res_code = get_delete_bitmap_lock(meta_service.get(), 1, -1, 1); ASSERT_EQ(res_code, MetaServiceCode::OK); test_commit_compaction_job(1, 2, 3, 4, TabletCompactionJobPB::CUMULATIVE); ASSERT_EQ(res.status().code(), MetaServiceCode::LOCK_EXPIRED); - remove_delete_bitmap_lock(meta_service.get(), 1); + res_code = remove_delete_bitmap_lock(meta_service.get(), 1, -1, 1); + ASSERT_EQ(res_code, MetaServiceCode::OK); clear_rowsets(4); res_code = get_delete_bitmap_lock(meta_service.get(), 1, -1, 12345); ASSERT_EQ(res_code, MetaServiceCode::OK); test_commit_compaction_job(1, 2, 3, 4, TabletCompactionJobPB::CUMULATIVE); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); - remove_delete_bitmap_lock(meta_service.get(), 1); clear_rowsets(4); test_start_compaction_job(2, 2, 3, 5, TabletCompactionJobPB::BASE); @@ -1320,7 +1341,8 @@ TEST(MetaServiceJobTest, CompactionJobWithMoWTest) { ASSERT_EQ(res_code, MetaServiceCode::OK); test_commit_compaction_job(2, 2, 3, 5, TabletCompactionJobPB::BASE); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); - remove_delete_bitmap_lock(meta_service.get(), 2); + res_code = remove_delete_bitmap_lock(meta_service.get(), 2, -1, 2345); + ASSERT_EQ(res_code, MetaServiceCode::OK); clear_rowsets(5); test_start_compaction_job(2, 2, 3, 6, TabletCompactionJobPB::BASE); @@ -1332,8 +1354,20 @@ TEST(MetaServiceJobTest, CompactionJobWithMoWTest) { ASSERT_EQ(res.status().code(), MetaServiceCode::OK); res_code = get_delete_bitmap_lock(meta_service.get(), 2, 123, -1); ASSERT_EQ(res_code, MetaServiceCode::OK); - remove_delete_bitmap_lock(meta_service.get(), 2); + res_code = remove_delete_bitmap_lock(meta_service.get(), 2, 123, -1); + ASSERT_EQ(res_code, MetaServiceCode::OK); clear_rowsets(6); + + // commit compaction job with lock expired + test_start_compaction_job(2, 2, 3, 5, TabletCompactionJobPB::BASE); + res_code = get_delete_bitmap_lock(meta_service.get(), 2, -1, 12345, 1); + ASSERT_EQ(res_code, MetaServiceCode::OK); + sleep(2); + test_commit_compaction_job(2, 2, 3, 5, TabletCompactionJobPB::BASE); + ASSERT_EQ(res.status().code(), MetaServiceCode::LOCK_EXPIRED); + res_code = remove_delete_bitmap_lock(meta_service.get(), 2, -1, 12345); + ASSERT_EQ(res_code, MetaServiceCode::OK); + clear_rowsets(5); } TEST(MetaServiceJobTest, SchemaChangeJobTest) { @@ -1747,7 +1781,7 @@ TEST(MetaServiceJobTest, SchemaChangeJobWithMoWTest) { int64_t tablet_id = 4; ASSERT_NO_FATAL_FAILURE( create_tablet(meta_service.get(), table_id, index_id, partition_id, tablet_id, true)); - + remove_delete_bitmap_lock(meta_service.get(), table_id); { int64_t new_tablet_id = 14; ASSERT_NO_FATAL_FAILURE(create_tablet(meta_service.get(), table_id, index_id, partition_id, @@ -1775,7 +1809,8 @@ TEST(MetaServiceJobTest, SchemaChangeJobWithMoWTest) { output_rowsets, res); ASSERT_EQ(res.status().code(), MetaServiceCode::LOCK_EXPIRED); ASSERT_NE(res.status().msg().find("lock id not match"), std::string::npos); - remove_delete_bitmap_lock(meta_service.get(), table_id); + res_code = remove_delete_bitmap_lock(meta_service.get(), table_id, -1, 2345); + ASSERT_EQ(res_code, MetaServiceCode::OK); res.Clear(); res_code = get_delete_bitmap_lock(meta_service.get(), table_id, -2, 2345); @@ -1784,7 +1819,8 @@ TEST(MetaServiceJobTest, SchemaChangeJobWithMoWTest) { output_rowsets, res); ASSERT_EQ(res.status().code(), MetaServiceCode::LOCK_EXPIRED); ASSERT_NE(res.status().msg().find("lock initiator 12345 not exist"), std::string::npos); - remove_delete_bitmap_lock(meta_service.get(), table_id); + res_code = remove_delete_bitmap_lock(meta_service.get(), table_id, -2, 2345); + ASSERT_EQ(res_code, MetaServiceCode::OK); res.Clear(); res_code = get_delete_bitmap_lock(meta_service.get(), table_id, -2, 12345); @@ -1792,7 +1828,8 @@ TEST(MetaServiceJobTest, SchemaChangeJobWithMoWTest) { finish_schema_change_job(meta_service.get(), tablet_id, new_tablet_id, "job1", "be1", output_rowsets, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); - remove_delete_bitmap_lock(meta_service.get(), table_id); + res_code = remove_delete_bitmap_lock(meta_service.get(), table_id, -2, 12345); + ASSERT_EQ(res_code, MetaServiceCode::LOCK_EXPIRED); res.Clear(); } @@ -1819,7 +1856,10 @@ TEST(MetaServiceJobTest, SchemaChangeJobWithMoWTest) { finish_schema_change_job(meta_service.get(), tablet_id, new_tablet_id, "job2", "be1", output_rowsets, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); - remove_delete_bitmap_lock(meta_service.get(), table_id); + res_code = remove_delete_bitmap_lock(meta_service.get(), table_id, -2, 12345); + ASSERT_EQ(res_code, MetaServiceCode::LOCK_EXPIRED); + res_code = remove_delete_bitmap_lock(meta_service.get(), table_id, -2, 12346); + ASSERT_EQ(res_code, MetaServiceCode::OK); res.Clear(); } } diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp index 10a5b3c6f18556..6c22b002db3ef5 100644 --- a/cloud/test/meta_service_test.cpp +++ b/cloud/test/meta_service_test.cpp @@ -4606,6 +4606,230 @@ TEST(MetaServiceTest, GetTabletStatsTest) { EXPECT_EQ(res.tablet_stats(0).segment_size(), 40000); } +void remove_delete_bitmap_lock(MetaServiceProxy* meta_service, int64_t table_id) { + std::string lock_key = + meta_delete_bitmap_update_lock_key({"test_cloud_unique_id", table_id, -1}); + std::unique_ptr txn; + ASSERT_EQ(meta_service->txn_kv()->create_txn(&txn), TxnErrorCode::TXN_OK); + txn->remove(lock_key); + std::string tablet_compaction_key_begin = + mow_tablet_compaction_key({"test_cloud_unique_id", table_id, 0}); + std::string tablet_compaction_key_end = + mow_tablet_compaction_key({"test_cloud_unique_id", table_id, INT64_MAX}); + txn->remove(tablet_compaction_key_begin, tablet_compaction_key_end); + ASSERT_EQ(txn->commit(), TxnErrorCode::TXN_OK); +} + +TEST(MetaServiceTest, GetDeleteBitmapUpdateLock) { + auto meta_service = get_meta_service(); + [[maybe_unused]] auto sp = SyncPoint::get_instance(); + std::unique_ptr> defer( + (int*)0x01, [](int*) { SyncPoint::get_instance()->clear_all_call_backs(); }); + remove_delete_bitmap_lock(meta_service.get(), 1); + remove_delete_bitmap_lock(meta_service.get(), 2); + int64_t table_id = 9; + + // case 1: lock key does not exist, get and remove load lock + brpc::Controller cntl; + GetDeleteBitmapUpdateLockRequest req; + GetDeleteBitmapUpdateLockResponse res; + req.set_cloud_unique_id("test_cloud_unique_id"); + req.set_table_id(table_id); + req.add_partition_ids(123); + req.set_expiration(5); + req.set_lock_id(888); + req.set_initiator(-1); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + RemoveDeleteBitmapUpdateLockRequest remove_req; + RemoveDeleteBitmapUpdateLockResponse remove_res; + remove_req.set_cloud_unique_id("test_cloud_unique_id"); + remove_req.set_table_id(table_id); + remove_req.set_lock_id(888); + remove_req.set_initiator(-1); + meta_service->remove_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &remove_req, &remove_res, + nullptr); + ASSERT_EQ(remove_res.status().code(), MetaServiceCode::OK); + + // case 2: lock key does not exist, get and remove compaction lock + req.add_partition_ids(123); + req.set_expiration(600); + req.set_lock_id(-1); + req.set_initiator(100); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + remove_req.set_tablet_id(2); + remove_req.set_lock_id(-1); + remove_req.set_initiator(100); + meta_service->remove_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &remove_req, &remove_res, + nullptr); + ASSERT_EQ(remove_res.status().code(), MetaServiceCode::OK); + + // case 3: lock key owned by load1, load2 get lock + req.add_partition_ids(123); + req.set_expiration(600); + req.set_lock_id(888); + req.set_initiator(-1); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + req.set_lock_id(889); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::LOCK_CONFLICT); + + // case 4: lock key owned by load1, compaction1 get lock + req.set_lock_id(-1); + req.set_initiator(100); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::LOCK_CONFLICT); + + remove_req.set_tablet_id(2); + remove_req.set_lock_id(888); + remove_req.set_initiator(-1); + meta_service->remove_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &remove_req, &remove_res, + nullptr); + ASSERT_EQ(remove_res.status().code(), MetaServiceCode::OK); + + // case 5: lock key owned by load1 but expired, load2 get lock + req.add_partition_ids(123); + req.set_expiration(1); + req.set_lock_id(888); + req.set_initiator(-1); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + sleep(2); + req.set_lock_id(889); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + remove_req.set_lock_id(889); + remove_req.set_initiator(-1); + meta_service->remove_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &remove_req, &remove_res, + nullptr); + ASSERT_EQ(remove_res.status().code(), MetaServiceCode::OK); + + // case 6: lock key owned by load1 but expired, compaction1 get lock + req.add_partition_ids(123); + req.set_expiration(1); + req.set_lock_id(888); + req.set_initiator(-1); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + sleep(2); + req.set_lock_id(-1); + req.set_initiator(888); + req.set_expiration(1); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + remove_req.set_lock_id(-1); + remove_req.set_initiator(888); + meta_service->remove_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &remove_req, &remove_res, + nullptr); + ASSERT_EQ(remove_res.status().code(), MetaServiceCode::OK); + + // case 7: lock key owned by compaction, new compaction get lock + req.set_lock_id(-1); + req.set_initiator(100); + req.set_expiration(100); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + req.set_lock_id(-1); + req.set_initiator(101); + req.set_expiration(1); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + // new compaction get lock again + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + // case 8: lock key owned by compaction, load1 get lock + req.set_lock_id(888); + req.set_initiator(-1); + req.set_expiration(60); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::LOCK_CONFLICT); + + remove_req.set_lock_id(-1); + remove_req.set_initiator(100); + meta_service->remove_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &remove_req, &remove_res, + nullptr); + ASSERT_EQ(remove_res.status().code(), MetaServiceCode::OK); + + // case 9: lock key owned by compaction but all expired (101 900), load1 get lock + req.set_table_id(table_id); + req.set_lock_id(-1); + req.set_initiator(900); + req.set_expiration(1); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + sleep(2); + req.set_table_id(table_id); + req.set_lock_id(888); + req.set_initiator(-1); + req.set_expiration(60); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + + remove_req.set_lock_id(888); + remove_req.set_initiator(-1); + meta_service->remove_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &remove_req, &remove_res, + nullptr); + ASSERT_EQ(remove_res.status().code(), MetaServiceCode::OK); + + // case 10: lock key does not exist, compaction get lock but txn commit conflict, do fast retry + sp->set_call_back("get_delete_bitmap_update_lock:commit:conflict", [&](auto&& args) { + auto* first_retry = try_any_cast(args[0]); + if (*first_retry) { + *try_any_cast(args[1]) = TxnErrorCode::TXN_CONFLICT; + } else { + *try_any_cast(args[1]) = TxnErrorCode::TXN_OK; + } + }); + sp->enable_processing(); + req.set_lock_id(-1); + req.set_initiator(100); + req.set_expiration(10); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &req, &res, nullptr); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + sp->clear_all_call_backs(); + sp->clear_trace(); + sp->disable_processing(); + + remove_delete_bitmap_lock(meta_service.get(), 1); + remove_delete_bitmap_lock(meta_service.get(), 2); +} + TEST(MetaServiceTest, GetDeleteBitmapUpdateLockNoReadStats) { auto meta_service = get_meta_service(); @@ -4925,6 +5149,7 @@ TEST(MetaServiceTest, UpdateDeleteBitmapWithBigKeys) { TEST(MetaServiceTest, UpdateDeleteBitmap) { auto meta_service = get_meta_service(); + remove_delete_bitmap_lock(meta_service.get(), 112); // get delete bitmap update lock brpc::Controller cntl; @@ -5223,6 +5448,96 @@ TEST(MetaServiceTest, UpdateDeleteBitmap) { ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(0), large_value); } + RemoveDeleteBitmapUpdateLockRequest remove_lock_req; + RemoveDeleteBitmapUpdateLockResponse remove_lock_res; + remove_lock_req.set_cloud_unique_id("test_cloud_unique_id"); + remove_lock_req.set_table_id(112); + remove_lock_req.set_lock_id(888); + remove_lock_req.set_initiator(-1); + meta_service->remove_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &remove_lock_req, + &remove_lock_res, nullptr); + ASSERT_EQ(remove_lock_res.status().code(), MetaServiceCode::OK); + + { + // case: compaction update delete bitmap + get_lock_req.set_lock_id(-1); + get_lock_req.set_initiator(800); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &get_lock_req, + &get_lock_res, nullptr); + ASSERT_EQ(get_lock_res.status().code(), MetaServiceCode::OK); + // update delete bitmap + UpdateDeleteBitmapRequest update_delete_bitmap_req; + UpdateDeleteBitmapResponse update_delete_bitmap_res; + update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id"); + update_delete_bitmap_req.set_table_id(112); + update_delete_bitmap_req.set_partition_id(123); + update_delete_bitmap_req.set_lock_id(-1); + update_delete_bitmap_req.set_initiator(800); + update_delete_bitmap_req.set_tablet_id(333); + update_delete_bitmap_req.add_rowset_ids("123"); + update_delete_bitmap_req.add_segment_ids(0); + update_delete_bitmap_req.add_versions(2); + update_delete_bitmap_req.add_segment_delete_bitmaps("compaction0"); + meta_service->update_delete_bitmap( + reinterpret_cast(&cntl), + &update_delete_bitmap_req, &update_delete_bitmap_res, nullptr); + ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK); + // remove lock + remove_lock_req.set_lock_id(-1); + remove_lock_req.set_initiator(800); + meta_service->remove_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &remove_lock_req, + &remove_lock_res, nullptr); + ASSERT_EQ(remove_lock_res.status().code(), MetaServiceCode::OK); + + // case: compaction update delete bitmap with lock expired + get_lock_req.set_lock_id(-1); + get_lock_req.set_initiator(800); + get_lock_req.set_expiration(1); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &get_lock_req, + &get_lock_res, nullptr); + ASSERT_EQ(get_lock_res.status().code(), MetaServiceCode::OK); + // load get lock + sleep(2); + get_lock_req.set_lock_id(100); + get_lock_req.set_initiator(-1); + get_lock_req.set_expiration(1); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &get_lock_req, + &get_lock_res, nullptr); + ASSERT_EQ(get_lock_res.status().code(), MetaServiceCode::OK); + // compaction update delete bitmap + meta_service->update_delete_bitmap( + reinterpret_cast(&cntl), + &update_delete_bitmap_req, &update_delete_bitmap_res, nullptr); + ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::LOCK_EXPIRED); + + // case: compaction2 get lock + sleep(2); + get_lock_req.set_lock_id(-1); + get_lock_req.set_initiator(810); + get_lock_req.set_expiration(1); + meta_service->get_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &get_lock_req, + &get_lock_res, nullptr); + ASSERT_EQ(get_lock_res.status().code(), MetaServiceCode::OK); + // compaction1 update delete bitmap + meta_service->update_delete_bitmap( + reinterpret_cast(&cntl), + &update_delete_bitmap_req, &update_delete_bitmap_res, nullptr); + ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::LOCK_EXPIRED); + // remove compaction2 lock + remove_lock_req.set_lock_id(-1); + remove_lock_req.set_initiator(810); + meta_service->remove_delete_bitmap_update_lock( + reinterpret_cast<::google::protobuf::RpcController*>(&cntl), &remove_lock_req, + &remove_lock_res, nullptr); + ASSERT_EQ(remove_lock_res.status().code(), MetaServiceCode::OK); + } + { //compaction update delete bitmap without lock UpdateDeleteBitmapRequest update_delete_bitmap_req; @@ -5266,6 +5581,8 @@ TEST(MetaServiceTest, UpdateDeleteBitmap) { ASSERT_EQ(get_delete_bitmap_res.versions(0), 2); ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(0), large_value); } + + remove_delete_bitmap_lock(meta_service.get(), 112); } TEST(MetaServiceTest, UpdateDeleteBitmapWithException) { diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp index 8efa3378461b91..5b9203b1d4ef79 100644 --- a/cloud/test/recycler_test.cpp +++ b/cloud/test/recycler_test.cpp @@ -408,6 +408,30 @@ static int create_partition_version_kv(TxnKv* txn_kv, int64_t table_id, int64_t return 0; } +static int create_delete_bitmap_update_lock_kv(TxnKv* txn_kv, int64_t table_id, int64_t lock_id, + int64_t initiator, int64_t expiration) { + auto key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); + DeleteBitmapUpdateLockPB lock_info; + lock_info.set_lock_id(lock_id); + auto val = lock_info.SerializeAsString(); + std::unique_ptr txn; + if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { + return -1; + } + txn->put(key, val); + std::string tablet_compaction_key = + mow_tablet_compaction_key({instance_id, table_id, initiator}); + std::string tablet_compaction_val; + MowTabletCompactionPB mow_tablet_compaction; + mow_tablet_compaction.set_expiration(expiration); + mow_tablet_compaction.SerializeToString(&tablet_compaction_val); + txn->put(tablet_compaction_key, tablet_compaction_val); + if (txn->commit() != TxnErrorCode::TXN_OK) { + return -1; + } + return 0; +} + static int create_table_version_kv(TxnKv* txn_kv, int64_t table_id) { auto key = table_version_key({instance_id, db_id, table_id}); std::string val(sizeof(int64_t), 0); @@ -1333,6 +1357,9 @@ TEST(RecyclerTest, recycle_versions) { for (int i = 0; i < 5; ++i) { create_recycle_partiton(txn_kv.get(), table_id, partition_ids[i], index_ids); } + // create delete bitmap update lock kv + create_delete_bitmap_update_lock_kv(txn_kv.get(), table_id, -1, 100, 60); + create_delete_bitmap_update_lock_kv(txn_kv.get(), table_id, -1, 110, 60); InstanceInfoPB instance; instance.set_instance_id(instance_id); @@ -1359,6 +1386,17 @@ TEST(RecyclerTest, recycle_versions) { ASSERT_EQ(iter->size(), 1); auto [tk, tv] = iter->next(); EXPECT_EQ(tk, table_version_key({instance_id, db_id, 10000})); + // delete bitmap update lock must not be deleted + auto delete_bitmap_update_lock_key = + meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); + std::string delete_bitmap_update_lock_val; + ASSERT_EQ(txn->get(delete_bitmap_update_lock_key, &delete_bitmap_update_lock_val), + TxnErrorCode::TXN_OK); + auto tablet_compaction_key0 = mow_tablet_compaction_key({instance_id, table_id, 0}); + auto tablet_compaction_key1 = mow_tablet_compaction_key({instance_id, table_id + 1, 0}); + ASSERT_EQ(txn->get(tablet_compaction_key0, tablet_compaction_key1, &iter), + TxnErrorCode::TXN_OK); + ASSERT_EQ(iter->size(), 2); // Drop indexes for (auto index_id : index_ids) { @@ -1373,6 +1411,12 @@ TEST(RecyclerTest, recycle_versions) { ASSERT_EQ(iter->size(), 0); ASSERT_EQ(txn->get(table_key_begin, table_key_end, &iter), TxnErrorCode::TXN_OK); ASSERT_EQ(iter->size(), 0); + // delete bitmap update lock must be deleted + ASSERT_EQ(txn->get(delete_bitmap_update_lock_key, &delete_bitmap_update_lock_val), + TxnErrorCode::TXN_KEY_NOT_FOUND); + ASSERT_EQ(txn->get(tablet_compaction_key0, tablet_compaction_key1, &iter), + TxnErrorCode::TXN_OK); + ASSERT_EQ(iter->size(), 0); } TEST(RecyclerTest, advance_pending_txn) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java index b6a0e702d918d4..7bb3c6771994c4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java @@ -971,9 +971,11 @@ private void getDeleteBitmapUpdateLock(long transactionId, List mowTa || response.getStatus().getCode() == MetaServiceCode.KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES) { // DELETE_BITMAP_LOCK_ERR will be retried on be throw new UserException(InternalErrorCode.DELETE_BITMAP_LOCK_ERR, - "Failed to get delete bitmap lock due to confilct"); + "Failed to get delete bitmap lock due to conflict"); } - throw new UserException("Failed to get delete bitmap lock, code: " + response.getStatus().getCode()); + throw new UserException( + "Failed to get delete bitmap lock, msg: " + response.getStatus().getMsg() + ", code: " + + response.getStatus().getCode()); } // record tablet's latest compaction stats from meta service and send them to BEs diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto index 230077e2eda83b..5450b367be8bde 100644 --- a/gensrc/proto/cloud.proto +++ b/gensrc/proto/cloud.proto @@ -1468,6 +1468,10 @@ message DeleteBitmapUpdateLockPB { repeated int64 initiators = 3; } +message MowTabletCompactionPB { + optional int64 expiration = 1; +} + message GetDeleteBitmapUpdateLockRequest { optional string cloud_unique_id = 1; // For auth optional int64 table_id = 2;