From 121d53a7168542f3cd2dd0dbfeb598dfcd757833 Mon Sep 17 00:00:00 2001 From: bright-starry-sky Date: Mon, 10 Aug 2020 20:50:11 +0800 Subject: [PATCH 1/3] fix error for checkpoint --- src/meta/ActiveHostsMan.cpp | 22 ++++++++++++++++++++++ src/meta/ActiveHostsMan.h | 2 ++ src/meta/processors/admin/SnapShot.cpp | 23 +++++++++++++++++++---- 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/meta/ActiveHostsMan.cpp b/src/meta/ActiveHostsMan.cpp index 722505e1b12..9137484f268 100644 --- a/src/meta/ActiveHostsMan.cpp +++ b/src/meta/ActiveHostsMan.cpp @@ -92,5 +92,27 @@ int64_t LastUpdateTimeMan::get(kvstore::KVStore* kv) { return 0; } +bool ActiveHostsMan::spaceExistInHost(kvstore::KVStore* kv, + GraphSpaceID space, + const HostAddr& host) { + folly::SharedMutex::ReadHolder rHolder(LockUtils::spaceLock()); + auto prefix = MetaServiceUtils::partPrefix(space); + std::unique_ptr iter; + auto kvRet = kv->prefix(kDefaultSpaceId, kDefaultPartId, prefix, &iter); + if (kvRet != kvstore::ResultCode::SUCCEEDED) { + return false; + } + while (iter->valid()) { + auto hosts = MetaServiceUtils::parsePartVal(iter->val()); + for (auto& ph : hosts) { + if (host == HostAddr(ph.get_ip(), ph.get_port())) { + return true; + } + } + iter->next(); + } + return false; +} + } // namespace meta } // namespace nebula diff --git a/src/meta/ActiveHostsMan.h b/src/meta/ActiveHostsMan.h index 43d5e1decc5..43913e76892 100644 --- a/src/meta/ActiveHostsMan.h +++ b/src/meta/ActiveHostsMan.h @@ -57,6 +57,8 @@ class ActiveHostsMan final { static bool isLived(kvstore::KVStore* kv, const HostAddr& host); + static bool spaceExistInHost(kvstore::KVStore* kv, GraphSpaceID space, const HostAddr& host); + protected: ActiveHostsMan() = default; }; diff --git a/src/meta/processors/admin/SnapShot.cpp b/src/meta/processors/admin/SnapShot.cpp index 5032d42e4ef..dbe8d0ae165 100644 --- a/src/meta/processors/admin/SnapShot.cpp +++ b/src/meta/processors/admin/SnapShot.cpp @@ -24,6 +24,11 @@ cpp2::ErrorCode Snapshot::createSnapshot(const std::string& name) { auto hosts = ActiveHostsMan::getActiveHosts(kv_); for (const auto& host : hosts) { for (auto& space : spaces) { + // If the storage node doesn't have this space. Skip this operation. + auto spaceExist = ActiveHostsMan::spaceExistInHost(kv_, space, host); + if (!spaceExist) { + continue; + } auto status = client_->createSnapshot(space, name, host).get(); if (!status.ok()) { return cpp2::ErrorCode::E_RPC_FAILURE; @@ -46,7 +51,12 @@ cpp2::ErrorCode Snapshot::dropSnapshot(const std::string& name, auto activeHosts = ActiveHostsMan::getActiveHosts(kv_); for (auto& host : hosts) { if (std::find(activeHosts.begin(), activeHosts.end(), host) != activeHosts.end()) { - std::for_each(spaces.begin(), spaces.end(), [name, host, this](auto& space) { + for (const auto& space : spaces) { + // If the storage node doesn't have this space. Skip this operation. + auto spaceExist = ActiveHostsMan::spaceExistInHost(kv_, space, host); + if (!spaceExist) { + continue; + } auto status = client_->dropSnapshot(space, name, host).get(); if (!status.ok()) { auto msg = "failed drop checkpoint : \"%s\". on host %s. error %s"; @@ -56,7 +66,7 @@ cpp2::ErrorCode Snapshot::dropSnapshot(const std::string& name, status.toString().c_str()); LOG(ERROR) << error; } - }); + } } } return cpp2::ErrorCode::SUCCEEDED; @@ -90,13 +100,18 @@ cpp2::ErrorCode Snapshot::blockingWrites(storage::cpp2::EngineSignType sign) { } auto hosts = ActiveHostsMan::getActiveHosts(kv_); for (const auto& host : hosts) { - std::for_each(spaces.begin(), spaces.end(), [host, sign, this](auto& space) { + for (const auto& space : spaces) { + // If the storage node doesn't have this space. Skip this operation. + auto spaceExist = ActiveHostsMan::spaceExistInHost(kv_, space, host); + if (!spaceExist) { + continue; + } auto status = client_->blockingWrites(space, sign, host).get(); if (!status.ok()) { LOG(ERROR) << " Send blocking sign error on host : " << network::NetworkUtils::toHosts({host}); } - }); + } } return cpp2::ErrorCode::SUCCEEDED; } From 1bff2ad8e130d4a7266fdd29efa7d38ddd0d20c5 Mon Sep 17 00:00:00 2001 From: bright-starry-sky Date: Thu, 13 Aug 2020 08:56:57 +0800 Subject: [PATCH 2/3] fix error for checkpoint --- src/meta/ActiveHostsMan.cpp | 22 ---------- src/meta/ActiveHostsMan.h | 2 - src/meta/processors/admin/SnapShot.cpp | 56 ++++++++++++++------------ src/meta/processors/admin/SnapShot.h | 5 +-- 4 files changed, 32 insertions(+), 53 deletions(-) diff --git a/src/meta/ActiveHostsMan.cpp b/src/meta/ActiveHostsMan.cpp index 9137484f268..722505e1b12 100644 --- a/src/meta/ActiveHostsMan.cpp +++ b/src/meta/ActiveHostsMan.cpp @@ -92,27 +92,5 @@ int64_t LastUpdateTimeMan::get(kvstore::KVStore* kv) { return 0; } -bool ActiveHostsMan::spaceExistInHost(kvstore::KVStore* kv, - GraphSpaceID space, - const HostAddr& host) { - folly::SharedMutex::ReadHolder rHolder(LockUtils::spaceLock()); - auto prefix = MetaServiceUtils::partPrefix(space); - std::unique_ptr iter; - auto kvRet = kv->prefix(kDefaultSpaceId, kDefaultPartId, prefix, &iter); - if (kvRet != kvstore::ResultCode::SUCCEEDED) { - return false; - } - while (iter->valid()) { - auto hosts = MetaServiceUtils::parsePartVal(iter->val()); - for (auto& ph : hosts) { - if (host == HostAddr(ph.get_ip(), ph.get_port())) { - return true; - } - } - iter->next(); - } - return false; -} - } // namespace meta } // namespace nebula diff --git a/src/meta/ActiveHostsMan.h b/src/meta/ActiveHostsMan.h index 43913e76892..43d5e1decc5 100644 --- a/src/meta/ActiveHostsMan.h +++ b/src/meta/ActiveHostsMan.h @@ -57,8 +57,6 @@ class ActiveHostsMan final { static bool isLived(kvstore::KVStore* kv, const HostAddr& host); - static bool spaceExistInHost(kvstore::KVStore* kv, GraphSpaceID space, const HostAddr& host); - protected: ActiveHostsMan() = default; }; diff --git a/src/meta/processors/admin/SnapShot.cpp b/src/meta/processors/admin/SnapShot.cpp index dbe8d0ae165..9d3a092dd1f 100644 --- a/src/meta/processors/admin/SnapShot.cpp +++ b/src/meta/processors/admin/SnapShot.cpp @@ -21,14 +21,9 @@ cpp2::ErrorCode Snapshot::createSnapshot(const std::string& name) { << static_cast(ret); return cpp2::ErrorCode::E_STORE_FAILURE; } - auto hosts = ActiveHostsMan::getActiveHosts(kv_); - for (const auto& host : hosts) { - for (auto& space : spaces) { - // If the storage node doesn't have this space. Skip this operation. - auto spaceExist = ActiveHostsMan::spaceExistInHost(kv_, space, host); - if (!spaceExist) { - continue; - } + for (auto& space : spaces) { + auto hostsBySpace = getHostsBySpace(space); + for (const auto& host : hostsBySpace) { auto status = client_->createSnapshot(space, name, host).get(); if (!status.ok()) { return cpp2::ErrorCode::E_RPC_FAILURE; @@ -47,16 +42,11 @@ cpp2::ErrorCode Snapshot::dropSnapshot(const std::string& name, << static_cast(ret); return cpp2::ErrorCode::E_STORE_FAILURE; } - // The drop checkpoint will be skip if original host has been lost. - auto activeHosts = ActiveHostsMan::getActiveHosts(kv_); - for (auto& host : hosts) { - if (std::find(activeHosts.begin(), activeHosts.end(), host) != activeHosts.end()) { - for (const auto& space : spaces) { - // If the storage node doesn't have this space. Skip this operation. - auto spaceExist = ActiveHostsMan::spaceExistInHost(kv_, space, host); - if (!spaceExist) { - continue; - } + + for (const auto& space : spaces) { + auto hostsBySpace = getHostsBySpace(space); + for (const auto& host : hostsBySpace) { + if (std::find(hosts.begin(), hosts.end(), host) != hosts.end()) { auto status = client_->dropSnapshot(space, name, host).get(); if (!status.ok()) { auto msg = "failed drop checkpoint : \"%s\". on host %s. error %s"; @@ -98,14 +88,9 @@ cpp2::ErrorCode Snapshot::blockingWrites(storage::cpp2::EngineSignType sign) { << static_cast(ret); return cpp2::ErrorCode::E_STORE_FAILURE; } - auto hosts = ActiveHostsMan::getActiveHosts(kv_); - for (const auto& host : hosts) { - for (const auto& space : spaces) { - // If the storage node doesn't have this space. Skip this operation. - auto spaceExist = ActiveHostsMan::spaceExistInHost(kv_, space, host); - if (!spaceExist) { - continue; - } + for (auto& space : spaces) { + auto hostsBySpace = getHostsBySpace(space); + for (const auto& host : hostsBySpace) { auto status = client_->blockingWrites(space, sign, host).get(); if (!status.ok()) { LOG(ERROR) << " Send blocking sign error on host : " @@ -116,6 +101,25 @@ cpp2::ErrorCode Snapshot::blockingWrites(storage::cpp2::EngineSignType sign) { return cpp2::ErrorCode::SUCCEEDED; } +std::set Snapshot::getHostsBySpace(GraphSpaceID space) { + folly::SharedMutex::ReadHolder rHolder(LockUtils::spaceLock()); + std::set hosts; + auto prefix = MetaServiceUtils::partPrefix(space); + std::unique_ptr iter; + auto kvRet = kv_->prefix(kDefaultSpaceId, kDefaultPartId, prefix, &iter); + if (kvRet != kvstore::ResultCode::SUCCEEDED) { + return {}; + } + while (iter->valid()) { + auto partHosts = MetaServiceUtils::parsePartVal(iter->val()); + for (auto& ph : partHosts) { + hosts.emplace(HostAddr(ph.get_ip(), ph.get_port())); + } + iter->next(); + } + return hosts; +} + } // namespace meta } // namespace nebula diff --git a/src/meta/processors/admin/SnapShot.h b/src/meta/processors/admin/SnapShot.h index a3a31f4968a..0f6c399d33d 100644 --- a/src/meta/processors/admin/SnapShot.h +++ b/src/meta/processors/admin/SnapShot.h @@ -32,9 +32,6 @@ class Snapshot { cpp2::ErrorCode blockingWrites(storage::cpp2::EngineSignType sign); - std::unordered_map> - getLeaderParts(HostLeaderMap *hostLeaderMap, GraphSpaceID spaceId); - private: Snapshot(kvstore::KVStore* kv, AdminClient* client) : kv_(kv), client_(client) { executor_.reset(new folly::CPUThreadPoolExecutor(1)); @@ -42,6 +39,8 @@ class Snapshot { bool getAllSpaces(std::vector& spaces, kvstore::ResultCode& retCode); + std::set getHostsBySpace(GraphSpaceID space); + private: kvstore::KVStore* kv_{nullptr}; AdminClient* client_{nullptr}; From 617fb24b576b45c78c15ee8b186ee5b3c80e0ac0 Mon Sep 17 00:00:00 2001 From: bright-starry-sky Date: Thu, 13 Aug 2020 18:55:30 +0800 Subject: [PATCH 3/3] Addressed darionyaphet's comment --- src/meta/MetaServiceUtils.cpp | 7 +++ src/meta/MetaServiceUtils.h | 2 +- src/meta/processors/admin/SnapShot.cpp | 86 +++++++++----------------- src/meta/processors/admin/SnapShot.h | 4 +- 4 files changed, 39 insertions(+), 60 deletions(-) diff --git a/src/meta/MetaServiceUtils.cpp b/src/meta/MetaServiceUtils.cpp index a617fa91c2d..520553dd09f 100644 --- a/src/meta/MetaServiceUtils.cpp +++ b/src/meta/MetaServiceUtils.cpp @@ -113,6 +113,13 @@ std::string MetaServiceUtils::partPrefix(GraphSpaceID spaceId) { return prefix; } +std::string MetaServiceUtils::partPrefix() { + std::string prefix; + prefix.reserve(kPartsTable.size() + sizeof(GraphSpaceID)); + prefix.append(kPartsTable.data(), kPartsTable.size()); + return prefix; +} + std::vector MetaServiceUtils::parsePartVal(folly::StringPiece val) { std::vector hosts; static const size_t unitSize = sizeof(int32_t) * 2; diff --git a/src/meta/MetaServiceUtils.h b/src/meta/MetaServiceUtils.h index 671a40d8160..5abcf7ca97a 100644 --- a/src/meta/MetaServiceUtils.h +++ b/src/meta/MetaServiceUtils.h @@ -55,7 +55,7 @@ class MetaServiceUtils final { static std::string partVal(const std::vector& hosts); - static const std::string& partPrefix(); + static std::string partPrefix(); static std::string partPrefix(GraphSpaceID spaceId); diff --git a/src/meta/processors/admin/SnapShot.cpp b/src/meta/processors/admin/SnapShot.cpp index 9d3a092dd1f..d48ecf0af97 100644 --- a/src/meta/processors/admin/SnapShot.cpp +++ b/src/meta/processors/admin/SnapShot.cpp @@ -14,17 +14,14 @@ namespace nebula { namespace meta { cpp2::ErrorCode Snapshot::createSnapshot(const std::string& name) { - std::vector spaces; - kvstore::ResultCode ret = kvstore::ResultCode::SUCCEEDED; - if (!getAllSpaces(spaces, ret)) { - LOG(ERROR) << "Can't access kvstore, ret = d" - << static_cast(ret); + auto retSpacesHosts = getSpacesHosts(); + if (!retSpacesHosts.ok()) { return cpp2::ErrorCode::E_STORE_FAILURE; } - for (auto& space : spaces) { - auto hostsBySpace = getHostsBySpace(space); - for (const auto& host : hostsBySpace) { - auto status = client_->createSnapshot(space, name, host).get(); + auto spacesHosts = retSpacesHosts.value(); + for (const auto& spaceHosts : spacesHosts) { + for (const auto& host : spaceHosts.second) { + auto status = client_->createSnapshot(spaceHosts.first, name, host).get(); if (!status.ok()) { return cpp2::ErrorCode::E_RPC_FAILURE; } @@ -35,19 +32,15 @@ cpp2::ErrorCode Snapshot::createSnapshot(const std::string& name) { cpp2::ErrorCode Snapshot::dropSnapshot(const std::string& name, const std::vector& hosts) { - std::vector spaces; - kvstore::ResultCode ret = kvstore::ResultCode::SUCCEEDED; - if (!getAllSpaces(spaces, ret)) { - LOG(ERROR) << "Can't access kvstore, ret = d" - << static_cast(ret); + auto retSpacesHosts = getSpacesHosts(); + if (!retSpacesHosts.ok()) { return cpp2::ErrorCode::E_STORE_FAILURE; } - - for (const auto& space : spaces) { - auto hostsBySpace = getHostsBySpace(space); - for (const auto& host : hostsBySpace) { + auto spacesHosts = retSpacesHosts.value(); + for (const auto& spaceHosts : spacesHosts) { + for (const auto& host : spaceHosts.second) { if (std::find(hosts.begin(), hosts.end(), host) != hosts.end()) { - auto status = client_->dropSnapshot(space, name, host).get(); + auto status = client_->dropSnapshot(spaceHosts.first, name, host).get(); if (!status.ok()) { auto msg = "failed drop checkpoint : \"%s\". on host %s. error %s"; auto error = folly::stringPrintf(msg, @@ -62,62 +55,43 @@ cpp2::ErrorCode Snapshot::dropSnapshot(const std::string& name, return cpp2::ErrorCode::SUCCEEDED; } -bool Snapshot::getAllSpaces(std::vector& spaces, kvstore::ResultCode& retCode) { - // Get all spaces - folly::SharedMutex::ReadHolder rHolder(LockUtils::spaceLock()); - auto prefix = MetaServiceUtils::spacePrefix(); - std::unique_ptr iter; - auto ret = kv_->prefix(kDefaultSpaceId, kDefaultPartId, prefix, &iter); - if (ret != kvstore::ResultCode::SUCCEEDED) { - retCode = ret; - return false; - } - while (iter->valid()) { - auto spaceId = MetaServiceUtils::spaceId(iter->key()); - spaces.push_back(spaceId); - iter->next(); - } - return true; -} - cpp2::ErrorCode Snapshot::blockingWrites(storage::cpp2::EngineSignType sign) { - std::vector spaces; - kvstore::ResultCode ret = kvstore::ResultCode::SUCCEEDED; - if (!getAllSpaces(spaces, ret)) { - LOG(ERROR) << "Can't access kvstore, ret = d" - << static_cast(ret); + auto retSpacesHosts = getSpacesHosts(); + if (!retSpacesHosts.ok()) { return cpp2::ErrorCode::E_STORE_FAILURE; } - for (auto& space : spaces) { - auto hostsBySpace = getHostsBySpace(space); - for (const auto& host : hostsBySpace) { - auto status = client_->blockingWrites(space, sign, host).get(); - if (!status.ok()) { - LOG(ERROR) << " Send blocking sign error on host : " - << network::NetworkUtils::toHosts({host}); - } + auto spacesHosts = retSpacesHosts.value(); + for (const auto& spaceHosts : spacesHosts) { + for (const auto& host : spaceHosts.second) { + auto status = client_->blockingWrites(spaceHosts.first, sign, host).get(); + if (!status.ok()) { + LOG(ERROR) << " Send blocking sign error on host : " + << network::NetworkUtils::toHosts({host}); + } } } return cpp2::ErrorCode::SUCCEEDED; } -std::set Snapshot::getHostsBySpace(GraphSpaceID space) { +StatusOr>> Snapshot::getSpacesHosts() { folly::SharedMutex::ReadHolder rHolder(LockUtils::spaceLock()); - std::set hosts; - auto prefix = MetaServiceUtils::partPrefix(space); + std::map> hostsByspaces; + auto prefix = MetaServiceUtils::partPrefix(); std::unique_ptr iter; auto kvRet = kv_->prefix(kDefaultSpaceId, kDefaultPartId, prefix, &iter); if (kvRet != kvstore::ResultCode::SUCCEEDED) { - return {}; + LOG(ERROR) << "Get hosts meta data error"; + return Status::Error("Get hosts meta data error"); } while (iter->valid()) { auto partHosts = MetaServiceUtils::parsePartVal(iter->val()); + auto space = MetaServiceUtils::parsePartKeySpaceId(iter->key()); for (auto& ph : partHosts) { - hosts.emplace(HostAddr(ph.get_ip(), ph.get_port())); + hostsByspaces[space].emplace(HostAddr(ph.get_ip(), ph.get_port())); } iter->next(); } - return hosts; + return hostsByspaces; } } // namespace meta diff --git a/src/meta/processors/admin/SnapShot.h b/src/meta/processors/admin/SnapShot.h index 0f6c399d33d..bb28897ad75 100644 --- a/src/meta/processors/admin/SnapShot.h +++ b/src/meta/processors/admin/SnapShot.h @@ -37,9 +37,7 @@ class Snapshot { executor_.reset(new folly::CPUThreadPoolExecutor(1)); } - bool getAllSpaces(std::vector& spaces, kvstore::ResultCode& retCode); - - std::set getHostsBySpace(GraphSpaceID space); + StatusOr>> getSpacesHosts(); private: kvstore::KVStore* kv_{nullptr};