From 0199edee8e5f94340d853d7026efc4a161c5f87b Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Tue, 4 Apr 2023 18:12:34 +0800 Subject: [PATCH] fix: log error but not crash if found an imcomplete replica path (#1428) https://github.com/apache/incubator-pegasus/issues/1383 The replica instance path will be removed to trash path, a.k.a `...err`, but it may not complete when a replica server crash, then the path is left but some files (e.g. `.init-info`) in the path have been moved. When restart the server after that, server will crash because of a check on existence of the files, which is not necessary, the server is able to trash the corrupt path and start normally, the missing replica can be recovered from other servers automatically. This patch removes the check. --- src/replica/replication_app_base.cpp | 5 ++++- src/replica/replication_app_base.h | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/replica/replication_app_base.cpp b/src/replica/replication_app_base.cpp index 5ae07b90bb..e7d3ef8ece 100644 --- a/src/replica/replication_app_base.cpp +++ b/src/replica/replication_app_base.cpp @@ -117,7 +117,10 @@ error_code write_blob_to_file(const std::string &file, const blob &data) error_code replica_init_info::load(const std::string &dir) { std::string info_path = utils::filesystem::path_combine(dir, kInitInfo); - CHECK(utils::filesystem::path_exists(info_path), "file({}) not exist", info_path); + ERR_LOG_AND_RETURN_NOT_TRUE(utils::filesystem::path_exists(info_path), + ERR_PATH_NOT_FOUND, + "file({}) not exist", + info_path); ERR_LOG_AND_RETURN_NOT_OK( load_json(info_path), "load replica_init_info from {} failed", info_path); LOG_INFO("load replica_init_info from {} succeed: {}", info_path, to_string()); diff --git a/src/replica/replication_app_base.h b/src/replica/replication_app_base.h index 29006a9569..b543a9111f 100644 --- a/src/replica/replication_app_base.h +++ b/src/replica/replication_app_base.h @@ -39,6 +39,7 @@ #include "replica/replica_base.h" #include "replica_admin_types.h" #include "utils/error_code.h" +#include "utils/ports.h" namespace dsn { class app_info; @@ -69,7 +70,7 @@ class replica_init_info public: replica_init_info() { memset((void *)this, 0, sizeof(*this)); } - error_code load(const std::string &dir); + error_code load(const std::string &dir) WARN_UNUSED_RESULT; error_code store(const std::string &dir); std::string to_string();