From 19f2bb749e143e96a0a49062406ad2df524ae897 Mon Sep 17 00:00:00 2001 From: "kyle.cao" Date: Mon, 27 Dec 2021 16:52:48 +0800 Subject: [PATCH 1/8] format (#3569) --- src/graph/visitor/ExtractFilterExprVisitor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/graph/visitor/ExtractFilterExprVisitor.cpp b/src/graph/visitor/ExtractFilterExprVisitor.cpp index 5c362891fed..087109e2f3c 100644 --- a/src/graph/visitor/ExtractFilterExprVisitor.cpp +++ b/src/graph/visitor/ExtractFilterExprVisitor.cpp @@ -368,7 +368,9 @@ void ExtractFilterExprVisitor::visit(LogicalExpression *expr) { return; } -void ExtractFilterExprVisitor::visit(SubscriptRangeExpression *) { canBePushed_ = false; } +void ExtractFilterExprVisitor::visit(SubscriptRangeExpression *) { + canBePushed_ = false; +} } // namespace graph } // namespace nebula From e6fd4a9c3121f66e5a3218ff236b38398b630637 Mon Sep 17 00:00:00 2001 From: "hs.zhang" <22708345+cangfengzhs@users.noreply.github.com> Date: Mon, 27 Dec 2021 17:38:03 +0800 Subject: [PATCH 2/8] Insert vertex only (#3335) * add insert vertex only parser add insert vertex only validator fix grammar adjust insert vertex parser * add test * format * merge master * modify parser * add fetch test * format * fix fetch vertex without tag error Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- .../planner/ngql/FetchVerticesPlanner.cpp | 2 +- src/parser/parser.yy | 3 ++ .../features/insert/insertVertexOnly.feature | 40 +++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 tests/tck/features/insert/insertVertexOnly.feature diff --git a/src/graph/planner/ngql/FetchVerticesPlanner.cpp b/src/graph/planner/ngql/FetchVerticesPlanner.cpp index 2e017817363..8e94bc12247 100644 --- a/src/graph/planner/ngql/FetchVerticesPlanner.cpp +++ b/src/graph/planner/ngql/FetchVerticesPlanner.cpp @@ -14,7 +14,7 @@ namespace graph { std::unique_ptr FetchVerticesPlanner::buildVertexProps( const ExpressionProps::TagIDPropsMap& propsMap) { if (propsMap.empty()) { - return nullptr; + return std::make_unique(); } auto vertexProps = std::make_unique(propsMap.size()); auto fun = [](auto& tag) { diff --git a/src/parser/parser.yy b/src/parser/parser.yy index 6d4c02565d4..2783049482d 100644 --- a/src/parser/parser.yy +++ b/src/parser/parser.yy @@ -2916,6 +2916,9 @@ insert_vertex_sentence : KW_INSERT KW_VERTEX opt_if_not_exists opt_ignore_existed_index vertex_tag_list KW_VALUES vertex_row_list { $$ = new InsertVerticesSentence($5, $7, $3, $4); } + | KW_INSERT KW_VERTEX opt_if_not_exists opt_ignore_existed_index KW_VALUES vertex_row_list { + $$ = new InsertVerticesSentence(new VertexTagList(), $6, $3, $4); + } ; vertex_tag_list diff --git a/tests/tck/features/insert/insertVertexOnly.feature b/tests/tck/features/insert/insertVertexOnly.feature new file mode 100644 index 00000000000..f8eb92ec5f6 --- /dev/null +++ b/tests/tck/features/insert/insertVertexOnly.feature @@ -0,0 +1,40 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: insert vertex without tag + + Background: Background name + Given an empty graph + And create a space with following options: + | partition_num | 9 | + | replica_factor | 1 | + | vid_type | int64 | + + Scenario: insert vertex only + Given having executed: + """ + CREATE EDGE e(); + """ + And wait 6 seconds + When executing query: + """ + INSERT VERTEX VALUES 1:(),2:(),3:(); + INSERT EDGE e() VALUES 1->2:(),2->3:(); + """ + Then the execution should be successful + When executing query: + """ + GO 2 STEP FROM 1 OVER e yield e._dst AS dst; + """ + Then the result should be, in any order: + | dst | + | 3 | + When executing query: + """ + FETCH PROP ON * 1,2 yield vertex AS v; + """ + Then the result should be, in any order, with relax comparison: + | v | + | (1) | + | (2) | + Then drop the used space From 301887e06defdec41a5ab5e771f1f9eb5dad59de Mon Sep 17 00:00:00 2001 From: Doodle <13706157+critical27@users.noreply.github.com> Date: Mon, 27 Dec 2021 04:03:00 -0600 Subject: [PATCH 3/8] [Raft] refactor processAppendLogRequest (#3435) * thrift done * processAppendLogRequest almost done * add some check * add committedLogTerm * make Part::cleanup atomic * remove useless wal rollback, pause Host when convert from leader to follower * fix pre_vote, step down as follower in all rpc * check wal valid * fix lastApplyLogId is set to -1 * format Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- src/interface/common.thrift | 2 +- src/interface/raftex.thrift | 70 ++--- src/kvstore/Listener.cpp | 14 +- src/kvstore/Listener.h | 11 +- src/kvstore/Part.cpp | 62 ++-- src/kvstore/Part.h | 5 +- src/kvstore/raftex/CMakeLists.txt | 1 + src/kvstore/raftex/Host.cpp | 142 ++++----- src/kvstore/raftex/Host.h | 18 +- src/kvstore/raftex/RaftLogIterator.cpp | 47 +++ src/kvstore/raftex/RaftLogIterator.h | 38 +++ src/kvstore/raftex/RaftPart.cpp | 367 +++++++++++++----------- src/kvstore/raftex/RaftPart.h | 23 +- src/kvstore/raftex/test/TestShard.cpp | 19 +- src/kvstore/raftex/test/TestShard.h | 5 +- src/kvstore/test/NebulaListenerTest.cpp | 3 +- src/kvstore/wal/AtomicLogBuffer.h | 9 +- src/kvstore/wal/WalFileIterator.cpp | 90 +++--- src/kvstore/wal/WalFileIterator.h | 4 + 19 files changed, 553 insertions(+), 377 deletions(-) create mode 100644 src/kvstore/raftex/RaftLogIterator.cpp create mode 100644 src/kvstore/raftex/RaftLogIterator.h diff --git a/src/interface/common.thrift b/src/interface/common.thrift index 3394c0f6bc6..22b1b8fcb17 100644 --- a/src/interface/common.thrift +++ b/src/interface/common.thrift @@ -254,7 +254,7 @@ struct CheckpointInfo { 3: binary path, } -// used for raft and drainer +// used for drainer struct LogEntry { 1: ClusterID cluster; 2: binary log_str; diff --git a/src/interface/raftex.thrift b/src/interface/raftex.thrift index 51000f86af7..7d002dd0f49 100644 --- a/src/interface/raftex.thrift +++ b/src/interface/raftex.thrift @@ -46,6 +46,8 @@ enum ErrorCode { E_PERSIST_SNAPSHOT_FAILED = -13; E_RPC_EXCEPTION = -14; // An thrift internal exception was thrown E_NO_WAL_FOUND = -15; + E_APPLY_FAIL = -16; + E_HOST_PAUSED = -17; } typedef i64 (cpp.type = "nebula::ClusterID") ClusterID @@ -72,56 +74,37 @@ struct AskForVoteRequest { // Response message for the vote call struct AskForVoteResponse { 1: ErrorCode error_code; + 2: TermID current_term; } +// Log entries being sent to follower, logId is not included, it could be calculated by +// last_log_id_sent and offset in log_str_list in AppendLogRequest +struct RaftLogEntry { + 1: ClusterID cluster; + 2: binary log_str; + 3: TermID log_term; +} -/* - AppendLogRequest serves two purposes: - - 1) Send a log message to followers and listeners - 2) Or, when log_id == 0 and len(log_str) == 0, it serves as a heartbeat -*/ struct AppendLogRequest { - // - // Fields 1 - 9 are common for both log appendent and heartbeat - // - // last_log_term_sent and last_log_id_sent are the term and log id - // for the last log being sent - // - 1: GraphSpaceID space; // Graphspace ID - 2: PartitionID part; // Partition ID - 3: TermID current_term; // Current term - 4: LogID last_log_id; // Last received log id - 5: LogID committed_log_id; // Last committed Log ID - 6: string leader_addr; // The leader's address - 7: Port leader_port; // The leader's Port - 8: TermID last_log_term_sent; - 9: LogID last_log_id_sent; - - // [deprecated]: heartbeat is moved to a separate interface - // - // Fields 10 to 11 are used for LogAppend. - // - // - // In the case of LogAppend, the id of the first log is the - // last_log_id_sent + 1 - // - // All logs in the log_str_list must belong to the same term, - // which specified by log_term - // - 10: TermID log_term; - 11: list log_str_list; + 1: GraphSpaceID space; // Graphspace ID + 2: PartitionID part; // Partition ID + 3: TermID current_term; // Current term + 4: LogID committed_log_id; // Last committed Log ID + 5: string leader_addr; // The leader's address + 6: Port leader_port; // The leader's Port + 7: TermID last_log_term_sent; // Term of log entry preceding log_str_list + 8: LogID last_log_id_sent; // Id of log entry preceding log_str_list + 9: list log_str_list; // First log id in log_str_list is last_log_id_sent + 1 } - struct AppendLogResponse { 1: ErrorCode error_code; 2: TermID current_term; 3: string leader_addr; 4: Port leader_port; 5: LogID committed_log_id; - 6: LogID last_log_id; - 7: TermID last_log_term; + 6: LogID last_matched_log_id; + 7: TermID last_matched_log_term; } struct SendSnapshotRequest { @@ -134,21 +117,14 @@ struct SendSnapshotRequest { 7: Port leader_port; 8: list rows; 9: i64 total_size; - 10: i64 total_count; - 11: bool done; + 10: i64 total_count; + 11: bool done; } struct HeartbeatRequest { - // - // Fields 1 - 9 are common for both log appendent and heartbeat - // - // last_log_term_sent and last_log_id_sent are the term and log id - // for the last log being sent - // 1: GraphSpaceID space; // Graphspace ID 2: PartitionID part; // Partition ID 3: TermID current_term; // Current term - 4: LogID last_log_id; // Last received log id 5: LogID committed_log_id; // Last committed Log ID 6: string leader_addr; // The leader's address 7: Port leader_port; // The leader's Port diff --git a/src/kvstore/Listener.cpp b/src/kvstore/Listener.cpp index 65d2e632cbd..8e5129dfb56 100644 --- a/src/kvstore/Listener.cpp +++ b/src/kvstore/Listener.cpp @@ -56,12 +56,13 @@ void Listener::start(std::vector&& peers, bool) { auto logIdAndTerm = lastCommittedLogId(); committedLogId_ = logIdAndTerm.first; + committedLogTerm_ = logIdAndTerm.second; if (lastLogId_ < committedLogId_) { LOG(INFO) << idStr_ << "Reset lastLogId " << lastLogId_ << " to be the committedLogId " << committedLogId_; lastLogId_ = committedLogId_; - lastLogTerm_ = term_; + lastLogTerm_ = committedLogTerm_; wal_->reset(); } @@ -122,16 +123,19 @@ bool Listener::preProcessLog(LogID logId, return true; } -cpp2::ErrorCode Listener::commitLogs(std::unique_ptr iter, bool) { - LogID lastId = -1; +std::tuple Listener::commitLogs( + std::unique_ptr iter, bool) { + LogID lastId = kNoCommitLogId; + TermID lastTerm = kNoCommitLogTerm; while (iter->valid()) { lastId = iter->logId(); + lastTerm = iter->logTerm(); ++(*iter); } if (lastId > 0) { leaderCommitId_ = lastId; } - return cpp2::ErrorCode::SUCCEEDED; + return {cpp2::ErrorCode::SUCCEEDED, lastId, lastTerm}; } void Listener::doApply() { @@ -216,7 +220,7 @@ void Listener::doApply() { } // apply to state machine - if (apply(data)) { + if (lastApplyId != -1 && apply(data)) { std::lock_guard guard(raftLock_); lastApplyLogId_ = lastApplyId; persist(committedLogId_, term_, lastApplyLogId_); diff --git a/src/kvstore/Listener.h b/src/kvstore/Listener.h index 1252ff894af..403af197b14 100644 --- a/src/kvstore/Listener.h +++ b/src/kvstore/Listener.h @@ -51,7 +51,8 @@ using RaftClient = thrift::ThriftClientManager iter, bool) + * std::tuple + * commitLogs(std::unique_ptr iter, bool) * * // For most of the listeners, just return true is enough. However, if listener need to be * // aware of membership change, some log type of wal need to be pre-processed, could do it @@ -84,7 +85,7 @@ using RaftClient = thrift::ThriftClientManager, bool) override; + std::tuple commitLogs(std::unique_ptr, + bool) override; // For most of the listeners, just return true is enough. However, if listener need to be aware // of membership change, some log type of wal need to be pre-processed, could do it here. diff --git a/src/kvstore/Part.cpp b/src/kvstore/Part.cpp index 00ee16768e9..ec10f7c9ed7 100644 --- a/src/kvstore/Part.cpp +++ b/src/kvstore/Part.cpp @@ -219,11 +219,12 @@ void Part::onDiscoverNewLeader(HostAddr nLeader) { } } -cpp2::ErrorCode Part::commitLogs(std::unique_ptr iter, bool wait) { +std::tuple Part::commitLogs( + std::unique_ptr iter, bool wait) { SCOPED_TIMER(&execTime_); auto batch = engine_->startBatchWrite(); - LogID lastId = -1; - TermID lastTerm = -1; + LogID lastId = kNoCommitLogId; + TermID lastTerm = kNoCommitLogTerm; while (iter->valid()) { lastId = iter->logId(); lastTerm = iter->logTerm(); @@ -242,7 +243,7 @@ cpp2::ErrorCode Part::commitLogs(std::unique_ptr iter, bool wait) { auto code = batch->put(pieces[0], pieces[1]); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Failed to call WriteBatch::put()"; - return code; + return {code, kNoCommitLogId, kNoCommitLogTerm}; } break; } @@ -256,7 +257,7 @@ cpp2::ErrorCode Part::commitLogs(std::unique_ptr iter, bool wait) { auto code = batch->put(kvs[i], kvs[i + 1]); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Failed to call WriteBatch::put()"; - return code; + return {code, kNoCommitLogId, kNoCommitLogTerm}; } } break; @@ -266,7 +267,7 @@ cpp2::ErrorCode Part::commitLogs(std::unique_ptr iter, bool wait) { auto code = batch->remove(key); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Failed to call WriteBatch::remove()"; - return code; + return {code, kNoCommitLogId, kNoCommitLogTerm}; } break; } @@ -276,7 +277,7 @@ cpp2::ErrorCode Part::commitLogs(std::unique_ptr iter, bool wait) { auto code = batch->remove(k); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Failed to call WriteBatch::remove()"; - return code; + return {code, kNoCommitLogId, kNoCommitLogTerm}; } } break; @@ -287,7 +288,7 @@ cpp2::ErrorCode Part::commitLogs(std::unique_ptr iter, bool wait) { auto code = batch->removeRange(range[0], range[1]); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Failed to call WriteBatch::removeRange()"; - return code; + return {code, kNoCommitLogId, kNoCommitLogTerm}; } break; } @@ -306,7 +307,7 @@ cpp2::ErrorCode Part::commitLogs(std::unique_ptr iter, bool wait) { } if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Failed to call WriteBatch"; - return code; + return {code, kNoCommitLogId, kNoCommitLogTerm}; } } break; @@ -351,11 +352,17 @@ cpp2::ErrorCode Part::commitLogs(std::unique_ptr iter, bool wait) { auto code = putCommitMsg(batch.get(), lastId, lastTerm); if (code != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Commit msg failed"; - return code; + return {code, kNoCommitLogId, kNoCommitLogTerm}; } } - return engine_->commitBatchWrite( + + auto code = engine_->commitBatchWrite( std::move(batch), FLAGS_rocksdb_disable_wal, FLAGS_rocksdb_wal_sync, wait); + if (code == nebula::cpp2::ErrorCode::SUCCEEDED) { + return {code, lastId, lastTerm}; + } else { + return {code, kNoCommitLogId, kNoCommitLogTerm}; + } } std::pair Part::commitSnapshot(const std::vector& rows, @@ -463,50 +470,53 @@ bool Part::preProcessLog(LogID logId, TermID termId, ClusterID clusterId, const return true; } -void Part::cleanup() { +nebula::cpp2::ErrorCode Part::cleanup() { LOG(INFO) << idStr_ << "Clean rocksdb part data"; + auto batch = engine_->startBatchWrite(); // Remove the vertex, edge, index, systemCommitKey, operation data under the part const auto& vertexPre = NebulaKeyUtils::tagPrefix(partId_); - auto ret = engine_->removeRange(NebulaKeyUtils::firstKey(vertexPre, vIdLen_), - NebulaKeyUtils::lastKey(vertexPre, vIdLen_)); + auto ret = batch->removeRange(NebulaKeyUtils::firstKey(vertexPre, vIdLen_), + NebulaKeyUtils::lastKey(vertexPre, vIdLen_)); if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Remove the part vertex data failed, error " << static_cast(ret); - return; + return ret; } const auto& edgePre = NebulaKeyUtils::edgePrefix(partId_); - ret = engine_->removeRange(NebulaKeyUtils::firstKey(edgePre, vIdLen_), - NebulaKeyUtils::lastKey(edgePre, vIdLen_)); + ret = batch->removeRange(NebulaKeyUtils::firstKey(edgePre, vIdLen_), + NebulaKeyUtils::lastKey(edgePre, vIdLen_)); if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Remove the part edge data failed, error" << static_cast(ret); - return; + return ret; } const auto& indexPre = IndexKeyUtils::indexPrefix(partId_); - ret = engine_->removeRange(NebulaKeyUtils::firstKey(indexPre, sizeof(IndexID)), - NebulaKeyUtils::lastKey(indexPre, sizeof(IndexID))); + ret = batch->removeRange(NebulaKeyUtils::firstKey(indexPre, sizeof(IndexID)), + NebulaKeyUtils::lastKey(indexPre, sizeof(IndexID))); if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Remove the part index data failed, error " << static_cast(ret); - return; + return ret; } const auto& operationPre = OperationKeyUtils::operationPrefix(partId_); - ret = engine_->removeRange(NebulaKeyUtils::firstKey(operationPre, sizeof(int64_t)), - NebulaKeyUtils::lastKey(operationPre, sizeof(int64_t))); + ret = batch->removeRange(NebulaKeyUtils::firstKey(operationPre, sizeof(int64_t)), + NebulaKeyUtils::lastKey(operationPre, sizeof(int64_t))); if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Remove the part operation data failed, error " << static_cast(ret); - return; + return ret; } - ret = engine_->remove(NebulaKeyUtils::systemCommitKey(partId_)); + ret = batch->remove(NebulaKeyUtils::systemCommitKey(partId_)); if (ret != nebula::cpp2::ErrorCode::SUCCEEDED) { LOG(ERROR) << idStr_ << "Remove the part system commit data failed, error " << static_cast(ret); + return ret; } - return; + return engine_->commitBatchWrite( + std::move(batch), FLAGS_rocksdb_disable_wal, FLAGS_rocksdb_wal_sync, true); } // TODO(pandasheep) unify raft errorcode diff --git a/src/kvstore/Part.h b/src/kvstore/Part.h index d634528288c..346776b3a13 100644 --- a/src/kvstore/Part.h +++ b/src/kvstore/Part.h @@ -96,7 +96,8 @@ class Part : public raftex::RaftPart { void onDiscoverNewLeader(HostAddr nLeader) override; - cpp2::ErrorCode commitLogs(std::unique_ptr iter, bool wait) override; + std::tuple commitLogs(std::unique_ptr iter, + bool wait) override; bool preProcessLog(LogID logId, TermID termId, @@ -112,7 +113,7 @@ class Part : public raftex::RaftPart { LogID committedLogId, TermID committedLogTerm); - void cleanup() override; + nebula::cpp2::ErrorCode cleanup() override; nebula::cpp2::ErrorCode toResultCode(raftex::AppendLogResult res); diff --git a/src/kvstore/raftex/CMakeLists.txt b/src/kvstore/raftex/CMakeLists.txt index 4056e4a47b8..6e2910c869c 100644 --- a/src/kvstore/raftex/CMakeLists.txt +++ b/src/kvstore/raftex/CMakeLists.txt @@ -1,5 +1,6 @@ nebula_add_library( raftex_obj OBJECT + RaftLogIterator.cpp RaftPart.cpp RaftexService.cpp Host.cpp diff --git a/src/kvstore/raftex/Host.cpp b/src/kvstore/raftex/Host.cpp index 088d9fc118e..17dbe1e7c1d 100644 --- a/src/kvstore/raftex/Host.cpp +++ b/src/kvstore/raftex/Host.cpp @@ -17,7 +17,7 @@ DEFINE_uint32(max_appendlog_batch_size, 128, "The max number of logs in each appendLog request batch"); DEFINE_uint32(max_outstanding_requests, 1024, "The max number of outstanding appendLog requests"); -DEFINE_int32(raft_rpc_timeout_ms, 500, "rpc timeout for raft client"); +DEFINE_int32(raft_rpc_timeout_ms, 1000, "rpc timeout for raft client"); DECLARE_bool(trace_raft); DECLARE_uint32(raft_heartbeat_interval_secs); @@ -43,13 +43,17 @@ void Host::waitForStop() { LOG(INFO) << idStr_ << "The host has been stopped!"; } -cpp2::ErrorCode Host::checkStatus() const { +cpp2::ErrorCode Host::canAppendLog() const { CHECK(!lock_.try_lock()); if (stopped_) { VLOG(2) << idStr_ << "The host is stopped, just return"; return cpp2::ErrorCode::E_HOST_STOPPED; } + if (paused_) { + VLOG(2) << idStr_ << "The host is paused, due to losing leadership"; + return cpp2::ErrorCode::E_HOST_PAUSED; + } return cpp2::ErrorCode::SUCCEEDED; } @@ -57,11 +61,10 @@ folly::Future Host::askForVote(const cpp2::AskForVoteR folly::EventBase* eb) { { std::lock_guard g(lock_); - auto res = checkStatus(); - if (res != cpp2::ErrorCode::SUCCEEDED) { + if (stopped_) { VLOG(2) << idStr_ << "The Host is not in a proper status, do not send"; cpp2::AskForVoteResponse resp; - resp.error_code_ref() = res; + resp.error_code_ref() = cpp2::ErrorCode::E_HOST_STOPPED; return resp; } } @@ -82,7 +85,7 @@ folly::Future Host::appendLogs(folly::EventBase* eb, { std::lock_guard g(lock_); - auto res = checkStatus(); + auto res = canAppendLog(); if (UNLIKELY(sendingSnapshot_)) { LOG_EVERY_N(INFO, 500) << idStr_ << "The target host is waiting for a snapshot"; @@ -161,10 +164,9 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptridStr_ << "AppendLogResponse " << "code " << apache::thrift::util::enumNameSafe(resp.get_error_code()) << ", currTerm " - << resp.get_current_term() << ", lastLogId " << resp.get_last_log_id() - << ", lastLogTerm " << resp.get_last_log_term() << ", commitLogId " - << resp.get_committed_log_id() << ", lastLogIdSent_ " << self->lastLogIdSent_ - << ", lastLogTermSent_ " << self->lastLogTermSent_; + << resp.get_current_term() << ", lastLogTerm " << resp.get_last_matched_log_term() + << ", commitLogId " << resp.get_committed_log_id() << ", lastLogIdSent_ " + << self->lastLogIdSent_ << ", lastLogTermSent_ " << self->lastLogTermSent_; switch (resp.get_error_code()) { case cpp2::ErrorCode::SUCCEEDED: case cpp2::ErrorCode::E_LOG_GAP: @@ -174,7 +176,7 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptr newReq; { std::lock_guard g(self->lock_); - auto res = self->checkStatus(); + auto res = self->canAppendLog(); if (res != cpp2::ErrorCode::SUCCEEDED) { cpp2::AppendLogResponse r; r.error_code_ref() = res; @@ -182,8 +184,8 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptrlastLogIdSent_ = resp.get_last_log_id(); - self->lastLogTermSent_ = resp.get_last_log_term(); + self->lastLogIdSent_ = resp.get_last_matched_log_id(); + self->lastLogTermSent_ = resp.get_last_matched_log_term(); self->followerCommittedLogId_ = resp.get_committed_log_id(); if (self->lastLogIdSent_ < self->logIdToSend_) { // More to send @@ -198,7 +200,7 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptr= self->logIdToSend_ + // resp.get_last_matched_log_id() >= self->logIdToSend_ // All logs up to logIdToSend_ has been sent, fulfill the promise self->promise_.setValue(resp); // Check if there are any pending request: @@ -237,8 +239,7 @@ void Host::appendLogsInternal(folly::EventBase* eb, std::shared_ptridStr_ << "append log time out" << ", space " << req->get_space() << ", part " << req->get_part() - << ", current term " << req->get_current_term() << ", last_log_id " - << req->get_last_log_id() << ", committed_id " + << ", current term " << req->get_current_term() << ", committed_id " << req->get_committed_log_id() << ", last_log_term_sent " << req->get_last_log_term_sent() << ", last_log_id_sent " << req->get_last_log_id_sent() << ", set lastLogIdSent_ to logIdToSend_ " @@ -267,6 +268,12 @@ ErrorOr> Host::prepareA CHECK(!lock_.try_lock()); VLOG(2) << idStr_ << "Prepare AppendLogs request from Log " << lastLogIdSent_ + 1 << " to " << logIdToSend_; + + // We need to use lastLogIdSent_ + 1 to check whether need to send snapshot + if (UNLIKELY(lastLogIdSent_ + 1 < part_->wal()->firstLogId())) { + return startSendSnapshot(); + } + if (lastLogIdSent_ + 1 > part_->wal()->lastLogId()) { LOG_IF(INFO, FLAGS_trace_raft) << idStr_ << "My lastLogId in wal is " << part_->wal()->lastLogId() @@ -274,62 +281,71 @@ ErrorOr> Host::prepareA << ", so i have nothing to send, logIdToSend_ = " << logIdToSend_; return cpp2::ErrorCode::E_NO_WAL_FOUND; } + auto it = part_->wal()->iterator(lastLogIdSent_ + 1, logIdToSend_); if (it->valid()) { - auto term = it->logTerm(); auto req = std::make_shared(); req->space_ref() = part_->spaceId(); req->part_ref() = part_->partitionId(); req->current_term_ref() = logTermToSend_; - req->last_log_id_ref() = logIdToSend_; + req->committed_log_id_ref() = committedLogId_; req->leader_addr_ref() = part_->address().host; req->leader_port_ref() = part_->address().port; - req->committed_log_id_ref() = committedLogId_; req->last_log_term_sent_ref() = lastLogTermSent_; req->last_log_id_sent_ref() = lastLogIdSent_; - req->log_term_ref() = term; - - std::vector logs; - for (size_t cnt = 0; - it->valid() && it->logTerm() == term && cnt < FLAGS_max_appendlog_batch_size; - ++(*it), ++cnt) { - nebula::cpp2::LogEntry le; - le.cluster_ref() = it->logSource(); - le.log_str_ref() = it->logMsg().toString(); - logs.emplace_back(std::move(le)); + + std::vector logs; + for (size_t cnt = 0; it->valid() && cnt < FLAGS_max_appendlog_batch_size; ++(*it), ++cnt) { + cpp2::RaftLogEntry entry; + entry.cluster_ref() = it->logSource(); + entry.log_str_ref() = it->logMsg().toString(); + entry.log_term_ref() = it->logTerm(); + logs.emplace_back(std::move(entry)); + } + // the last log entry's id is (lastLogIdSent_ + cnt), when iterator is invalid and last log + // entry's id is not logIdToSend_, which means the log has been rollbacked + if (!it->valid() && (lastLogIdSent_ + static_cast(logs.size()) != logIdToSend_)) { + LOG_IF(INFO, FLAGS_trace_raft) + << idStr_ << "Can't find log in wal, logIdToSend_ = " << logIdToSend_; + return cpp2::ErrorCode::E_NO_WAL_FOUND; } req->log_str_list_ref() = std::move(logs); return req; } else { - if (!sendingSnapshot_) { - LOG(INFO) << idStr_ << "Can't find log " << lastLogIdSent_ + 1 << " in wal, send the snapshot" - << ", logIdToSend = " << logIdToSend_ - << ", firstLogId in wal = " << part_->wal()->firstLogId() - << ", lastLogId in wal = " << part_->wal()->lastLogId(); - sendingSnapshot_ = true; - part_->snapshot_->sendSnapshot(part_, addr_) - .thenValue([self = shared_from_this()](auto&& status) { - std::lock_guard g(self->lock_); - if (status.ok()) { - auto commitLogIdAndTerm = status.value(); - self->lastLogIdSent_ = commitLogIdAndTerm.first; - self->lastLogTermSent_ = commitLogIdAndTerm.second; - self->followerCommittedLogId_ = commitLogIdAndTerm.first; - LOG(INFO) << self->idStr_ << "Send snapshot succeeded!" - << " commitLogId = " << commitLogIdAndTerm.first - << " commitLogTerm = " << commitLogIdAndTerm.second; - } else { - LOG(INFO) << self->idStr_ << "Send snapshot failed!"; - // TODO(heng): we should tell the follower i am failed. - } - self->sendingSnapshot_ = false; - self->noMoreRequestCV_.notify_all(); - }); - } else { - LOG_EVERY_N(INFO, 100) << idStr_ << "The snapshot req is in queue, please wait for a moment"; - } - return cpp2::ErrorCode::E_WAITING_SNAPSHOT; + return cpp2::ErrorCode::E_NO_WAL_FOUND; + } +} + +cpp2::ErrorCode Host::startSendSnapshot() { + CHECK(!lock_.try_lock()); + if (!sendingSnapshot_) { + LOG(INFO) << idStr_ << "Can't find log " << lastLogIdSent_ + 1 << " in wal, send the snapshot" + << ", logIdToSend = " << logIdToSend_ + << ", firstLogId in wal = " << part_->wal()->firstLogId() + << ", lastLogId in wal = " << part_->wal()->lastLogId(); + sendingSnapshot_ = true; + part_->snapshot_->sendSnapshot(part_, addr_) + .thenValue([self = shared_from_this()](auto&& status) { + std::lock_guard g(self->lock_); + if (status.ok()) { + auto commitLogIdAndTerm = status.value(); + self->lastLogIdSent_ = commitLogIdAndTerm.first; + self->lastLogTermSent_ = commitLogIdAndTerm.second; + self->followerCommittedLogId_ = commitLogIdAndTerm.first; + LOG(INFO) << self->idStr_ << "Send snapshot succeeded!" + << " commitLogId = " << commitLogIdAndTerm.first + << " commitLogTerm = " << commitLogIdAndTerm.second; + } else { + LOG(INFO) << self->idStr_ << "Send snapshot failed!"; + // TODO(heng): we should tell the follower i am failed. + } + self->sendingSnapshot_ = false; + self->noMoreRequestCV_.notify_all(); + }); + } else { + LOG_EVERY_N(INFO, 100) << idStr_ << "The snapshot req is in queue, please wait for a moment"; } + return cpp2::ErrorCode::E_WAITING_SNAPSHOT; } folly::Future Host::sendAppendLogRequest( @@ -338,7 +354,7 @@ folly::Future Host::sendAppendLogRequest( { std::lock_guard g(lock_); - auto res = checkStatus(); + auto res = canAppendLog(); if (res != cpp2::ErrorCode::SUCCEEDED) { LOG(WARNING) << idStr_ << "The Host is not in a proper status, do not send"; cpp2::AppendLogResponse resp; @@ -349,8 +365,7 @@ folly::Future Host::sendAppendLogRequest( LOG_IF(INFO, FLAGS_trace_raft) << idStr_ << "Sending appendLog: space " << req->get_space() << ", part " << req->get_part() << ", current term " - << req->get_current_term() << ", last_log_id " - << req->get_last_log_id() << ", committed_id " + << req->get_current_term() << ", committed_id " << req->get_committed_log_id() << ", last_log_term_sent " << req->get_last_log_term_sent() << ", last_log_id_sent " << req->get_last_log_id_sent() << ", logs in request " @@ -362,7 +377,6 @@ folly::Future Host::sendAppendLogRequest( folly::Future Host::sendHeartbeat(folly::EventBase* eb, TermID term, - LogID latestLogId, LogID commitLogId, TermID lastLogTerm, LogID lastLogId) { @@ -370,7 +384,6 @@ folly::Future Host::sendHeartbeat(folly::EventBase* eb, req->space_ref() = part_->spaceId(); req->part_ref() = part_->partitionId(); req->current_term_ref() = term; - req->last_log_id_ref() = latestLogId; req->committed_log_id_ref() = commitLogId; req->leader_addr_ref() = part_->address().host; req->leader_port_ref() = part_->address().port; @@ -401,7 +414,7 @@ folly::Future Host::sendHeartbeatRequest( { std::lock_guard g(lock_); - auto res = checkStatus(); + auto res = canAppendLog(); if (res != cpp2::ErrorCode::SUCCEEDED) { LOG(WARNING) << idStr_ << "The Host is not in a proper status, do not send"; cpp2::HeartbeatResponse resp; @@ -412,8 +425,7 @@ folly::Future Host::sendHeartbeatRequest( LOG_IF(INFO, FLAGS_trace_raft) << idStr_ << "Sending heartbeat: space " << req->get_space() << ", part " << req->get_part() << ", current term " - << req->get_current_term() << ", last_log_id " - << req->get_last_log_id() << ", committed_id " + << req->get_current_term() << ", committed_id " << req->get_committed_log_id() << ", last_log_term_sent " << req->get_last_log_term_sent() << ", last_log_id_sent " << req->get_last_log_id_sent(); diff --git a/src/kvstore/raftex/Host.h b/src/kvstore/raftex/Host.h index d622b4a9b0b..c0d8b6633f1 100644 --- a/src/kvstore/raftex/Host.h +++ b/src/kvstore/raftex/Host.h @@ -38,6 +38,18 @@ class Host final : public std::enable_shared_from_this { return idStr_.c_str(); } + // This will be called when the shard lost its leadership + void pause() { + std::lock_guard g(lock_); + paused_ = true; + } + + // This will be called when the shard becomes the leader + void resume() { + std::lock_guard g(lock_); + paused_ = false; + } + void stop() { std::lock_guard g(lock_); stopped_ = true; @@ -79,7 +91,6 @@ class Host final : public std::enable_shared_from_this { folly::Future sendHeartbeat(folly::EventBase* eb, TermID term, - LogID latestLogId, LogID commitLogId, TermID lastLogTerm, LogID lastLogId); @@ -89,7 +100,7 @@ class Host final : public std::enable_shared_from_this { } private: - cpp2::ErrorCode checkStatus() const; + cpp2::ErrorCode canAppendLog() const; folly::Future sendAppendLogRequest( folly::EventBase* eb, std::shared_ptr req); @@ -101,6 +112,8 @@ class Host final : public std::enable_shared_from_this { ErrorOr> prepareAppendLogRequest(); + cpp2::ErrorCode startSendSnapshot(); + bool noRequest() const; void setResponse(const cpp2::AppendLogResponse& r); @@ -118,6 +131,7 @@ class Host final : public std::enable_shared_from_this { mutable std::mutex lock_; + bool paused_{false}; bool stopped_{false}; // whether there is a batch of logs for target host in on going diff --git a/src/kvstore/raftex/RaftLogIterator.cpp b/src/kvstore/raftex/RaftLogIterator.cpp new file mode 100644 index 00000000000..2fa987ff9ed --- /dev/null +++ b/src/kvstore/raftex/RaftLogIterator.cpp @@ -0,0 +1,47 @@ +/* Copyright (c) 2018 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "kvstore/raftex/RaftLogIterator.h" + +#include "common/base/Base.h" +#include "common/thrift/ThriftTypes.h" + +namespace nebula { +namespace raftex { + +RaftLogIterator::RaftLogIterator(LogID firstLogId, std::vector logEntries) + : idx_(0), firstLogId_(firstLogId), logEntries_(std::move(logEntries)) {} + +RaftLogIterator& RaftLogIterator::operator++() { + ++idx_; + return *this; +} + +bool RaftLogIterator::valid() const { + return idx_ < logEntries_.size(); +} + +LogID RaftLogIterator::logId() const { + DCHECK(valid()); + return firstLogId_ + idx_; +} + +TermID RaftLogIterator::logTerm() const { + DCHECK(valid()); + return logEntries_.at(idx_).get_log_term(); +} + +ClusterID RaftLogIterator::logSource() const { + DCHECK(valid()); + return logEntries_.at(idx_).get_cluster(); +} + +folly::StringPiece RaftLogIterator::logMsg() const { + DCHECK(valid()); + return logEntries_.at(idx_).get_log_str(); +} + +} // namespace raftex +} // namespace nebula diff --git a/src/kvstore/raftex/RaftLogIterator.h b/src/kvstore/raftex/RaftLogIterator.h new file mode 100644 index 00000000000..95fc0955170 --- /dev/null +++ b/src/kvstore/raftex/RaftLogIterator.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#pragma once + +#include "common/base/Base.h" +#include "common/utils/LogIterator.h" +#include "interface/gen-cpp2/raftex_types.h" + +namespace nebula { +namespace raftex { + +class RaftLogIterator final : public LogIterator { + public: + RaftLogIterator(LogID firstLogId, std::vector logEntries); + + RaftLogIterator& operator++() override; + + bool valid() const override; + + LogID logId() const override; + + TermID logTerm() const override; + + ClusterID logSource() const override; + + folly::StringPiece logMsg() const override; + + private: + size_t idx_; + const LogID firstLogId_; + std::vector logEntries_; +}; + +} // namespace raftex +} // namespace nebula diff --git a/src/kvstore/raftex/RaftPart.cpp b/src/kvstore/raftex/RaftPart.cpp index 4b0678935bc..f8571c775b4 100644 --- a/src/kvstore/raftex/RaftPart.cpp +++ b/src/kvstore/raftex/RaftPart.cpp @@ -22,6 +22,7 @@ #include "interface/gen-cpp2/RaftexServiceAsyncClient.h" #include "kvstore/LogEncoder.h" #include "kvstore/raftex/Host.h" +#include "kvstore/raftex/RaftLogIterator.h" #include "kvstore/stats/KVStats.h" #include "kvstore/wal/FileBasedWal.h" @@ -286,18 +287,20 @@ void RaftPart::start(std::vector&& peers, bool asLearner) { auto logIdAndTerm = lastCommittedLogId(); committedLogId_ = logIdAndTerm.first; + committedLogTerm_ = logIdAndTerm.second; if (lastLogId_ < committedLogId_) { LOG(INFO) << idStr_ << "Reset lastLogId " << lastLogId_ << " to be the committedLogId " << committedLogId_; lastLogId_ = committedLogId_; - lastLogTerm_ = logIdAndTerm.second; + lastLogTerm_ = committedLogTerm_; wal_->reset(); } LOG(INFO) << idStr_ << "There are " << peers.size() << " peer hosts, and total " << peers.size() + 1 << " copies. The quorum is " << quorum_ + 1 << ", as learner " << asLearner << ", lastLogId " << lastLogId_ << ", lastLogTerm " << lastLogTerm_ - << ", committedLogId " << committedLogId_ << ", term " << term_; + << ", committedLogId " << committedLogId_ << ", committedLogTerm " << committedLogTerm_ + << ", term " << term_; // Start all peer hosts for (auto& addr : peers) { @@ -428,6 +431,9 @@ void RaftPart::commitTransLeader(const HostAddr& target) { lastMsgRecvDur_.reset(); role_ = Role::FOLLOWER; leader_ = HostAddr("", 0); + for (auto& host : hosts_) { + host->pause(); + } LOG(INFO) << idStr_ << "Give up my leadership!"; } } else { @@ -738,7 +744,8 @@ void RaftPart::appendLogsInternal(AppendLogsIterator iter, TermID termId) { if (!wal_->appendLogs(iter)) { LOG_EVERY_N(WARNING, 100) << idStr_ << "Failed to write into WAL"; res = AppendLogResult::E_WAL_FAILURE; - wal_->rollbackToLog(lastLogId_); + lastLogId_ = wal_->lastLogId(); + lastLogTerm_ = wal_->lastLogTerm(); break; } lastId = wal_->lastLogId(); @@ -774,7 +781,8 @@ void RaftPart::replicateLogs(folly::EventBase* eb, std::lock_guard g(raftLock_); res = canAppendLogs(currTerm); if (res != AppendLogResult::SUCCEEDED) { - wal_->rollbackToLog(lastLogId_); + lastLogId_ = wal_->lastLogId(); + lastLogTerm_ = wal_->lastLogTerm(); break; } hosts = hosts_; @@ -854,11 +862,29 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps, std::vector> hosts) { // Make sure majority have succeeded size_t numSucceeded = 0; + TermID highestTerm = currTerm; for (auto& res : resps) { if (!hosts[res.first]->isLearner() && res.second.get_error_code() == cpp2::ErrorCode::SUCCEEDED) { ++numSucceeded; } + highestTerm = std::max(highestTerm, res.second.get_current_term()); + } + + AppendLogResult res = AppendLogResult::SUCCEEDED; + { + std::lock_guard g(raftLock_); + if (highestTerm > term_) { + term_ = highestTerm; + role_ = Role::FOLLOWER; + leader_ = HostAddr("", 0); + lastLogId_ = wal_->lastLogId(); + lastLogTerm_ = wal_->lastLogTerm(); + res = AppendLogResult::E_TERM_OUT_OF_DATE; + } + } + if (!checkAppendLogResult(res)) { + return; } if (numSucceeded >= quorum_) { @@ -866,12 +892,12 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps, VLOG(2) << idStr_ << numSucceeded << " hosts have accepted the logs"; LogID firstLogId = 0; - AppendLogResult res = AppendLogResult::SUCCEEDED; do { std::lock_guard g(raftLock_); res = canAppendLogs(currTerm); if (res != AppendLogResult::SUCCEEDED) { - wal_->rollbackToLog(lastLogId_); + lastLogId_ = wal_->lastLogId(); + lastLogTerm_ = wal_->lastLogTerm(); break; } lastLogId_ = lastLogId; @@ -889,10 +915,13 @@ void RaftPart::processAppendLogResponses(const AppendLogResponses& resps, auto walIt = wal_->iterator(committedId + 1, lastLogId); SlowOpTracker tracker; // Step 3: Commit the batch - if (commitLogs(std::move(walIt), true) == nebula::cpp2::ErrorCode::SUCCEEDED) { + auto [code, lastCommitId, lastCommitTerm] = commitLogs(std::move(walIt), true); + if (code == nebula::cpp2::ErrorCode::SUCCEEDED) { stats::StatsManager::addValue(kCommitLogLatencyUs, execTime_); std::lock_guard g(raftLock_); - committedLogId_ = lastLogId; + CHECK_EQ(lastLogId, lastCommitId); + committedLogId_ = lastCommitId; + committedLogTerm_ = lastCommitTerm; firstLogId = lastLogId_ + 1; lastMsgAcceptedCostMs_ = lastMsgSentDur_.elapsedInMSec(); lastMsgAcceptedTime_ = time::WallClock::fastNowInMilliSec(); @@ -1011,6 +1040,7 @@ bool RaftPart::prepareElectionRequest(cpp2::AskForVoteRequest& req, req.part_ref() = partId_; req.candidate_addr_ref() = addr_.host; req.candidate_port_ref() = addr_.port; + req.is_pre_vote_ref() = isPreVote; // Use term_ + 1 to check if peers would vote for me in prevote. // Only increase the term when prevote succeeeded. if (isPreVote) { @@ -1068,6 +1098,8 @@ bool RaftPart::processElectionResponses(const RaftPart::ElectionResponses& resul return false; } + CHECK(role_ == Role::CANDIDATE); + // term changed during actual leader election if (!isPreVote && proposedTerm != term_) { LOG(INFO) << idStr_ << "Partition's term has changed during election, " @@ -1077,6 +1109,7 @@ bool RaftPart::processElectionResponses(const RaftPart::ElectionResponses& resul } size_t numSucceeded = 0; + TermID highestTerm = isPreVote ? proposedTerm - 1 : proposedTerm; for (auto& r : results) { if (r.second.get_error_code() == cpp2::ErrorCode::SUCCEEDED) { ++numSucceeded; @@ -1086,9 +1119,15 @@ bool RaftPart::processElectionResponses(const RaftPart::ElectionResponses& resul << apache::thrift::util::enumNameSafe(r.second.get_error_code()) << ", isPreVote = " << isPreVote; } + highestTerm = std::max(highestTerm, r.second.get_current_term()); } - CHECK(role_ == Role::CANDIDATE); + if (highestTerm > term_) { + term_ = highestTerm; + role_ = Role::FOLLOWER; + leader_ = HostAddr("", 0); + return false; + } if (numSucceeded >= quorum_) { if (isPreVote) { @@ -1176,7 +1215,6 @@ folly::Future RaftPart::leaderElection(bool isPreVote) { VLOG(2) << self->idStr_ << "AskForVoteRequest has been sent to all peers, waiting for responses"; CHECK(!t.hasException()); - self->inElection_ = false; pro.setValue( self->handleElectionResponses(t.value(), std::move(hosts), proposedTerm, isPreVote)); }); @@ -1206,9 +1244,11 @@ bool RaftPart::handleElectionResponses(const ElectionResponses& resps, // reset host can't be executed with raftLock_, otherwise it may encounter deadlock for (auto& host : hosts) { host->reset(); + host->resume(); } sendHeartbeat(); } + inElection_ = false; return elected; } @@ -1222,7 +1262,7 @@ void RaftPart::statusPolling(int64_t startTime) { return; } } - size_t delay = FLAGS_raft_heartbeat_interval_secs * 1000 / 3 + +folly::Random::rand32(500); + size_t delay = FLAGS_raft_heartbeat_interval_secs * 1000 / 3 + folly::Random::rand32(500); if (needToStartElection()) { if (leaderElection(true).get() && leaderElection(false).get()) { // elected as leader @@ -1285,6 +1325,7 @@ void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, << ", isPreVote = " << req.get_is_pre_vote(); std::lock_guard g(raftLock_); + resp.current_term_ref() = term_; // Make sure the partition is running if (UNLIKELY(status_ == Status::STOPPED)) { @@ -1332,10 +1373,18 @@ void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, auto oldRole = role_; auto oldTerm = term_; if (!req.get_is_pre_vote() && req.get_term() > term_) { - // req.get_term() > term_, we won't update term in prevote + // req.get_term() > term_ in formal election, update term and convert to follower term_ = req.get_term(); role_ = Role::FOLLOWER; leader_ = HostAddr("", 0); + resp.current_term_ref() = term_; + } else if (req.get_is_pre_vote() && req.get_term() - 1 > term_) { + // req.get_term() - 1 > term_ in prevote, update term and convert to follower. + // we need to substract 1 because the candidate's actaul term is req.term() - 1 + term_ = req.get_term() - 1; + role_ = Role::FOLLOWER; + leader_ = HostAddr("", 0); + resp.current_term_ref() = term_; } // Check the last term to receive a log @@ -1387,10 +1436,16 @@ void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, // not a pre-vote, need to rollback wal if necessary // role_ and term_ has been set above - if (oldRole == Role::LEADER && wal_->lastLogId() > lastLogId_) { - LOG(INFO) << idStr_ << "There are some logs up to " << wal_->lastLogId() - << " i did not commit when i was leader, rollback to " << lastLogId_; - wal_->rollbackToLog(lastLogId_); + if (oldRole == Role::LEADER) { + if (wal_->lastLogId() > lastLogId_) { + LOG(INFO) << idStr_ << "There are some logs up to " << wal_->lastLogId() + << " update lastLogId_ " << lastLogId_ << " to wal's"; + lastLogId_ = wal_->lastLogId(); + lastLogTerm_ = wal_->lastLogTerm(); + } + for (auto& host : hosts_) { + host->pause(); + } } if (oldRole == Role::LEADER) { bgWorkers_->addTask([self = shared_from_this(), oldTerm] { self->onLostLeadership(oldTerm); }); @@ -1402,6 +1457,7 @@ void RaftPart::processAskForVoteRequest(const cpp2::AskForVoteRequest& req, votedAddr_ = candidate; votedTerm_ = req.get_term(); resp.error_code_ref() = cpp2::ErrorCode::SUCCEEDED; + resp.current_term_ref() = term_; // Reset the last message time lastMsgRecvDur_.reset(); @@ -1418,12 +1474,10 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, << ", leaderIp = " << req.get_leader_addr() << ", leaderPort = " << req.get_leader_port() << ", current_term = " << req.get_current_term() - << ", lastLogId = " << req.get_last_log_id() << ", committedLogId = " << req.get_committed_log_id() << ", lastLogIdSent = " << req.get_last_log_id_sent() << ", lastLogTermSent = " << req.get_last_log_term_sent() << ", num_logs = " << req.get_log_str_list().size() - << ", logTerm = " << req.get_log_term() << ", local lastLogId = " << lastLogId_ << ", local lastLogTerm = " << lastLogTerm_ << ", local committedLogId = " << committedLogId_ @@ -1435,8 +1489,9 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, resp.leader_addr_ref() = leader_.host; resp.leader_port_ref() = leader_.port; resp.committed_log_id_ref() = committedLogId_; - resp.last_log_id_ref() = lastLogId_ < committedLogId_ ? committedLogId_ : lastLogId_; - resp.last_log_term_ref() = lastLogTerm_; + // by default we ask leader send logs after committedLogId_ + resp.last_matched_log_id_ref() = committedLogId_; + resp.last_matched_log_term_ref() = committedLogTerm_; // Check status if (UNLIKELY(status_ == Status::STOPPED)) { @@ -1456,6 +1511,8 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, } // Check leadership cpp2::ErrorCode err = verifyLeader(req); + // Set term_ again because it may be modified in verifyLeader + resp.current_term_ref() = term_; if (err != cpp2::ErrorCode::SUCCEEDED) { // Wrong leadership VLOG(2) << idStr_ << "Will not follow the leader"; @@ -1466,134 +1523,95 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, // Reset the timeout timer lastMsgRecvDur_.reset(); - if (req.get_last_log_id_sent() < committedLogId_ && req.get_last_log_term_sent() <= term_) { - LOG(INFO) << idStr_ << "Stale log! The log " << req.get_last_log_id_sent() << ", term " - << req.get_last_log_term_sent() << " i had committed yet. My committedLogId is " - << committedLogId_ << ", term is " << term_; - resp.error_code_ref() = cpp2::ErrorCode::E_LOG_STALE; - return; - } else if (req.get_last_log_id_sent() < committedLogId_) { - LOG(INFO) << idStr_ << "What?? How it happens! The log id is " << req.get_last_log_id_sent() - << ", the log term is " << req.get_last_log_term_sent() - << ", but my committedLogId is " << committedLogId_ << ", my term is " << term_ - << ", to make the cluster stable i will follow the high term" - << " candidate and cleanup my data"; - reset(); - resp.committed_log_id_ref() = committedLogId_; - resp.last_log_id_ref() = lastLogId_; - resp.last_log_term_ref() = lastLogTerm_; - return; - } - - // req.get_last_log_id_sent() >= committedLogId_ - if (req.get_last_log_id_sent() == lastLogId_ && req.get_last_log_term_sent() == lastLogTerm_) { - // nothing to do - // just append log later - } else if (req.get_last_log_id_sent() > lastLogId_) { - // There is a gap - LOG(INFO) << idStr_ << "Local is missing logs from id " << lastLogId_ << ". Need to catch up"; - resp.error_code_ref() = cpp2::ErrorCode::E_LOG_GAP; - return; - } else { - // check the last log term is matched or not - int reqLastLogTerm = wal_->getLogTerm(req.get_last_log_id_sent()); - if (req.get_last_log_term_sent() != reqLastLogTerm) { - LOG(INFO) << idStr_ << "The local log term is " << reqLastLogTerm - << ", which is different from the leader's prevLogTerm " - << req.get_last_log_term_sent() << ", the prevLogId is " - << req.get_last_log_id_sent() << ". So ask leader to send logs from committedLogId " - << committedLogId_; - TermID committedLogTerm = wal_->getLogTerm(committedLogId_); - if (committedLogTerm > 0) { - resp.last_log_id_ref() = committedLogId_; - resp.last_log_term_ref() = committedLogTerm; + // `lastMatchedLogId` is the last log id of which leader's and follower's log are matched + // (which means log term of same log id are the same) + // The relationships are as follows: + // myself.committedLogId_ <= lastMatchedLogId <= lastLogId_ + LogID lastMatchedLogId = committedLogId_; + do { + size_t diffIndex = 0; + size_t numLogs = req.get_log_str_list().size(); + LogID firstId = req.get_last_log_id_sent() + 1; + LogID lastId = req.get_last_log_id_sent() + numLogs; + if (req.get_last_log_id_sent() == lastLogId_ && req.get_last_log_term_sent() == lastLogTerm_) { + // happy path: logs are matched, just append log + } else { + // We ask leader to send logs from committedLogId_ if one of the following occurs: + // 1. Some of log entry in current request has been committed + // 2. I don't have the log of req.last_log_id_sent + // 3. My log term on req.last_log_id_sent is not same as req.last_log_term_sent + // todo(doodle): One of the most common case when req.get_last_log_id_sent() < committedLogId_ + // is that leader timeout, and retry with same request, but follower has received it + // previously in fact. There are two choise: ask leader to send logs after committedLogId_ or + // just do nothing. + if (req.get_last_log_id_sent() < committedLogId_ || + wal_->lastLogId() < req.get_last_log_id_sent() || + wal_->getLogTerm(req.get_last_log_id_sent()) != req.get_last_log_term_sent()) { + resp.last_matched_log_id_ref() = committedLogId_; + resp.last_matched_log_term() = committedLogTerm_; + resp.error_code() = cpp2::ErrorCode::E_LOG_GAP; + // lastMatchedLogId is committedLogId_ + return; } - resp.error_code_ref() = cpp2::ErrorCode::E_LOG_GAP; - return; - } - } - - // request get_last_log_term_sent == wal[get_last_log_id_sent].log_term - size_t numLogs = req.get_log_str_list().size(); - LogID firstId = req.get_last_log_id_sent() + 1; - size_t diffIndex = 0; - do { - // find the first id/term not match, rollback until it, and append the remaining wal - if (!(req.get_last_log_id_sent() == lastLogId_ && - req.get_last_log_term_sent() == lastLogTerm_)) { - // check the diff index in log, find the first log which term is not same as term in request - { - std::unique_ptr it = wal_->iterator(firstId, firstId + numLogs - 1); - for (size_t i = 0; i < numLogs && it->valid(); i++, ++(*it), diffIndex++) { - int logTerm = it->logTerm(); - if (req.get_log_term() != logTerm) { - break; - } + // wal_->logTerm(req.get_last_log_id_sent()) == req.get_last_log_term() + // Try to find the diff point by comparing each log entry's term of same id between local wal + // and log entry in request + TermID lastTerm = (numLogs == 0) ? req.get_last_log_term_sent() + : req.get_log_str_list().back().get_log_term(); + auto localWalIt = wal_->iterator(firstId, lastId); + for (size_t i = 0; i < numLogs && localWalIt->valid(); ++i, ++(*localWalIt), ++diffIndex) { + if (localWalIt->logTerm() != req.get_log_str_list()[i].get_log_term()) { + break; } } - - // stale log if (diffIndex == numLogs) { - // All logs have been received before - resp.last_log_id_ref() = firstId + numLogs - 1; - resp.last_log_term_ref() = req.get_log_term(); - // nothing to append, goto commit + // all logs are the same, ask leader to send logs after lastId + lastMatchedLogId = lastId; + resp.last_matched_log_id_ref() = lastId; + resp.last_matched_log_term_ref() = lastTerm; break; } - // rollback the wal - if (wal_->rollbackToLog(firstId + diffIndex - 1)) { - lastLogId_ = wal_->lastLogId(); - lastLogTerm_ = wal_->lastLogTerm(); - LOG(INFO) << idStr_ << "Rollback succeeded! lastLogId is " << lastLogId_ - << ", logLogTerm is " << lastLogTerm_ << ", committedLogId is " << committedLogId_ - << ", logs in request " << numLogs << ", remaining logs after rollback " - << numLogs - diffIndex; - } else { - LOG(ERROR) << idStr_ << "Rollback fail! lastLogId is" << lastLogId_ << ", logLogTerm is " - << lastLogTerm_ << ", committedLogId is " << committedLogId_ - << ", rollback id is " << firstId + diffIndex - 1; - resp.error_code_ref() = cpp2::ErrorCode::E_WAL_FAIL; - return; - } - - // update msg + // Found a difference at log of (firstId + diffIndex), all logs from (firstId + diffIndex) + // could be truncated + wal_->rollbackToLog(firstId + diffIndex - 1); firstId = firstId + diffIndex; numLogs = numLogs - diffIndex; } - // Append new logs - std::vector logEntries = std::vector( + // happy path or a difference is found: append remaing logs + auto logEntries = std::vector( std::make_move_iterator(req.get_log_str_list().begin() + diffIndex), std::make_move_iterator(req.get_log_str_list().end())); - LogStrListIterator iter(firstId, req.get_log_term(), std::move(logEntries)); - if (wal_->appendLogs(iter)) { - if (numLogs != 0) { - CHECK_EQ(firstId + numLogs - 1, wal_->lastLogId()) << "First Id is " << firstId; - } + RaftLogIterator logIter(firstId, std::move(logEntries)); + if (wal_->appendLogs(logIter)) { + CHECK_EQ(lastId, wal_->lastLogId()); lastLogId_ = wal_->lastLogId(); lastLogTerm_ = wal_->lastLogTerm(); - resp.last_log_id_ref() = lastLogId_; - resp.last_log_term_ref() = lastLogTerm_; + lastMatchedLogId = lastLogId_; + resp.last_matched_log_id_ref() = lastLogId_; + resp.last_matched_log_term_ref() = lastLogTerm_; } else { resp.error_code_ref() = cpp2::ErrorCode::E_WAL_FAIL; return; } } while (false); - LogID lastLogIdCanCommit = std::min(lastLogId_, req.get_committed_log_id()); + // If follower found a point where log matches leader's log (lastMatchedLogId), if leader's + // committed_log_id is greater than lastMatchedLogId, we can commit logs before lastMatchedLogId + LogID lastLogIdCanCommit = std::min(lastMatchedLogId, req.get_committed_log_id()); + CHECK_LE(lastLogIdCanCommit, wal_->lastLogId()); if (lastLogIdCanCommit > committedLogId_) { - // Commit some logs - // We can only commit logs from firstId to min(lastLogId_, leader's commit - // log id), follower can't always commit to leader's commit id because of - // lack of log - auto code = commitLogs(wal_->iterator(committedLogId_ + 1, lastLogIdCanCommit), false); + auto walIt = wal_->iterator(committedLogId_ + 1, lastLogIdCanCommit); + auto [code, lastCommitId, lastCommitTerm] = commitLogs(std::move(walIt), false); if (code == nebula::cpp2::ErrorCode::SUCCEEDED) { stats::StatsManager::addValue(kCommitLogLatencyUs, execTime_); VLOG(1) << idStr_ << "Follower succeeded committing log " << committedLogId_ + 1 << " to " << lastLogIdCanCommit; - committedLogId_ = lastLogIdCanCommit; + CHECK_EQ(lastLogIdCanCommit, lastCommitId); + committedLogId_ = lastCommitId; + committedLogTerm_ = lastCommitTerm; resp.committed_log_id_ref() = lastLogIdCanCommit; resp.error_code_ref() = cpp2::ErrorCode::SUCCEEDED; } else if (code == nebula::cpp2::ErrorCode::E_WRITE_STALLED) { @@ -1607,6 +1625,7 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req, } else { LOG(ERROR) << idStr_ << "Failed to commit log " << committedLogId_ + 1 << " to " << req.get_committed_log_id(); + resp.committed_log_id_ref() = committedLogId_; resp.error_code_ref() = cpp2::ErrorCode::E_WAL_FAIL; } } else { @@ -1637,21 +1656,18 @@ cpp2::ErrorCode RaftPart::verifyLeader(const REQ& req) { // found new leader with higher term } else { // req.get_current_term() == term_ - do { - if (UNLIKELY(role_ == Role::LEADER)) { + if (UNLIKELY(leader_ == HostAddr("", 0))) { + // I don't know who is the leader, will accept it as new leader + } else { + // I know who is leader + if (LIKELY(leader_ == peer)) { + // Same leader + return cpp2::ErrorCode::SUCCEEDED; + } else { LOG(ERROR) << idStr_ << "Split brain happens, will follow the new leader " << peer << " on term " << req.get_current_term(); - break; - } else { - if (LIKELY(leader_ == peer)) { - // Same leader - return cpp2::ErrorCode::SUCCEEDED; - } else if (UNLIKELY(leader_ == HostAddr("", 0))) { - // I don't know who is the leader, will accept it as new leader - break; - } } - } while (false); + } } // Update my state. @@ -1668,10 +1684,16 @@ cpp2::ErrorCode RaftPart::verifyLeader(const REQ& req) { term_ = req.get_current_term(); isBlindFollower_ = false; // Before accept the logs from the new leader, check the logs locally. - if (wal_->lastLogId() > lastLogId_) { - LOG(INFO) << idStr_ << "There is one log " << wal_->lastLogId() - << " i did not commit when i was leader, rollback to " << lastLogId_; - wal_->rollbackToLog(lastLogId_); + if (oldRole == Role::LEADER) { + if (wal_->lastLogId() > lastLogId_) { + LOG(INFO) << idStr_ << "There are some logs up to " << wal_->lastLogId() + << " update lastLogId_ " << lastLogId_ << " to wal's"; + lastLogId_ = wal_->lastLogId(); + lastLogTerm_ = wal_->lastLogTerm(); + } + for (auto& host : hosts_) { + host->pause(); + } } if (oldRole == Role::LEADER) { // Need to invoke onLostLeadership callback @@ -1689,7 +1711,6 @@ void RaftPart::processHeartbeatRequest(const cpp2::HeartbeatRequest& req, << ", leaderIp = " << req.get_leader_addr() << ", leaderPort = " << req.get_leader_port() << ", current_term = " << req.get_current_term() - << ", lastLogId = " << req.get_last_log_id() << ", committedLogId = " << req.get_committed_log_id() << ", lastLogIdSent = " << req.get_last_log_id_sent() << ", lastLogTermSent = " << req.get_last_log_term_sent() @@ -1699,11 +1720,16 @@ void RaftPart::processHeartbeatRequest(const cpp2::HeartbeatRequest& req, << ", local current term = " << term_; std::lock_guard g(raftLock_); + // As for heartbeat, last_log_id and last_log_term is not checked by leader, follower only verify + // whether leader is legal, just return lastLogId_ and lastLogTerm_ in resp. And we don't do any + // log appending. + // Leader will check the current_term in resp, if req.term < term_ (follower will reject req in + // verifyLeader), leader will update term and step down as follower. resp.current_term_ref() = term_; resp.leader_addr_ref() = leader_.host; resp.leader_port_ref() = leader_.port; resp.committed_log_id_ref() = committedLogId_; - resp.last_log_id_ref() = lastLogId_ < committedLogId_ ? committedLogId_ : lastLogId_; + resp.last_log_id_ref() = lastLogId_; resp.last_log_term_ref() = lastLogTerm_; // Check status @@ -1719,6 +1745,8 @@ void RaftPart::processHeartbeatRequest(const cpp2::HeartbeatRequest& req, } // Check leadership cpp2::ErrorCode err = verifyLeader(req); + // Set term_ again because it may be modified in verifyLeader + resp.current_term_ref() = term_; if (err != cpp2::ErrorCode::SUCCEEDED) { // Wrong leadership VLOG(2) << idStr_ << "Will not follow the leader"; @@ -1784,15 +1812,17 @@ void RaftPart::processSendSnapshotRequest(const cpp2::SendSnapshotRequest& req, } if (req.get_done()) { committedLogId_ = req.get_committed_log_id(); + committedLogTerm_ = req.get_committed_log_term(); lastLogId_ = committedLogId_; - lastLogTerm_ = req.get_committed_log_term(); + lastLogTerm_ = committedLogTerm_; term_ = lastLogTerm_; // there should be no wal after state converts to WAITING_SNAPSHOT, the RaftPart has been reset DCHECK_EQ(wal_->firstLogId(), 0); DCHECK_EQ(wal_->lastLogId(), 0); status_ = Status::RUNNING; LOG(INFO) << idStr_ << "Receive all snapshot, committedLogId_ " << committedLogId_ - << ", lastLodId " << lastLogId_ << ", lastLogTermId " << lastLogTerm_; + << ", committedLogTerm_ " << committedLogTerm_ << ", lastLodId " << lastLogId_ + << ", lastLogTermId " << lastLogTerm_; } resp.error_code_ref() = cpp2::ErrorCode::SUCCEEDED; return; @@ -1811,7 +1841,6 @@ void RaftPart::sendHeartbeat() { using namespace folly; // NOLINT since the fancy overload of | operator VLOG(2) << idStr_ << "Send heartbeat"; TermID currTerm = 0; - LogID latestLogId = 0; LogID commitLogId = 0; TermID prevLogTerm = 0; LogID prevLogId = 0; @@ -1820,7 +1849,6 @@ void RaftPart::sendHeartbeat() { { std::lock_guard g(raftLock_); currTerm = term_; - latestLogId = wal_->lastLogId(); commitLogId = committedLogId_; prevLogTerm = lastLogTerm_; prevLogId = lastLogId_; @@ -1829,37 +1857,42 @@ void RaftPart::sendHeartbeat() { } auto eb = ioThreadPool_->getEventBase(); auto startMs = time::WallClock::fastNowInMilliSec(); - collectNSucceeded(gen::from(hosts) | - gen::map([self = shared_from_this(), - eb, - currTerm, - latestLogId, - commitLogId, - prevLogId, - prevLogTerm](std::shared_ptr hostPtr) { - VLOG(2) << self->idStr_ << "Send heartbeat to " << hostPtr->idStr(); - return via(eb, [=]() -> Future { - return hostPtr->sendHeartbeat( - eb, currTerm, latestLogId, commitLogId, prevLogTerm, prevLogId); - }); - }) | - gen::as(), - // Number of succeeded required - hosts.size(), - // Result evaluator - [hosts](size_t index, cpp2::HeartbeatResponse& resp) { - return resp.get_error_code() == cpp2::ErrorCode::SUCCEEDED && - !hosts[index]->isLearner(); - }) - .then([replica, hosts = std::move(hosts), startMs, this]( + collectNSucceeded( + gen::from(hosts) | + gen::map([self = shared_from_this(), eb, currTerm, commitLogId, prevLogId, prevLogTerm]( + std::shared_ptr hostPtr) { + VLOG(2) << self->idStr_ << "Send heartbeat to " << hostPtr->idStr(); + return via(eb, [=]() -> Future { + return hostPtr->sendHeartbeat(eb, currTerm, commitLogId, prevLogTerm, prevLogId); + }); + }) | + gen::as(), + // Number of succeeded required + hosts.size(), + // Result evaluator + [hosts](size_t index, cpp2::HeartbeatResponse& resp) { + return resp.get_error_code() == cpp2::ErrorCode::SUCCEEDED && !hosts[index]->isLearner(); + }) + .then([replica, hosts = std::move(hosts), startMs, currTerm, this]( folly::Try&& resps) { CHECK(!resps.hasException()); size_t numSucceeded = 0; + TermID highestTerm = currTerm; for (auto& resp : *resps) { if (!hosts[resp.first]->isLearner() && resp.second.get_error_code() == cpp2::ErrorCode::SUCCEEDED) { ++numSucceeded; } + highestTerm = std::max(highestTerm, resp.second.get_current_term()); + } + { + std::lock_guard g(raftLock_); + if (highestTerm > term_) { + term_ = highestTerm; + role_ = Role::FOLLOWER; + leader_ = HostAddr("", 0); + return; + } } if (numSucceeded >= replica) { VLOG(2) << idStr_ << "Heartbeat is accepted by quorum"; @@ -1921,7 +1954,7 @@ void RaftPart::reset() { wal_->reset(); cleanup(); lastLogId_ = committedLogId_ = 0; - lastLogTerm_ = 0; + lastLogTerm_ = committedLogTerm_ = 0; lastTotalCount_ = 0; lastTotalSize_ = 0; } diff --git a/src/kvstore/raftex/RaftPart.h b/src/kvstore/raftex/RaftPart.h index b1604cafc70..b5f99de4219 100644 --- a/src/kvstore/raftex/RaftPart.h +++ b/src/kvstore/raftex/RaftPart.h @@ -286,9 +286,12 @@ class RaftPart : public std::enable_shared_from_this { // Check if we can accept candidate's message virtual cpp2::ErrorCode checkPeer(const HostAddr& candidate); - // The inherited classes need to implement this method to commit - // a batch of log messages - virtual nebula::cpp2::ErrorCode commitLogs(std::unique_ptr iter, bool wait) = 0; + // The inherited classes need to implement this method to commit a batch of log messages. + // Return {error code, last commit log id, last commit log term}. + // When no logs applied to state machine or error occurs when calling commitLogs, + // kNoCommitLogId and kNoCommitLogTerm are returned. + virtual std::tuple commitLogs( + std::unique_ptr iter, bool wait) = 0; virtual bool preProcessLog(LogID logId, TermID termId, @@ -302,7 +305,7 @@ class RaftPart : public std::enable_shared_from_this { bool finished) = 0; // Clean up extra data about the part, usually related to state machine - virtual void cleanup() = 0; + virtual nebula::cpp2::ErrorCode cleanup() = 0; void addPeer(const HostAddr& peer); @@ -533,15 +536,21 @@ class RaftPart : public std::enable_shared_from_this { // To prevent we have voted more than once in a same term TermID votedTerm_{0}; - // The id and term of the last-sent log + // As for leader lastLogId_ is the log id which has been replicated to majority peers. + // As for follower lastLogId_ is only a latest log id from current leader or any leader of + // previous term. Not all logs before lastLogId_ could be applied for follower. LogID lastLogId_{0}; TermID lastLogTerm_{0}; - // The id for the last globally committed log (from the leader) + + // The last id and term when logs has been applied to state machine LogID committedLogId_{0}; + TermID committedLogTerm_{0}; + static constexpr LogID kNoCommitLogId{-1}; + static constexpr TermID kNoCommitLogTerm{-1}; // To record how long ago when the last leader message received time::Duration lastMsgRecvDur_; - // To record how long ago when the last log message or heartbeat was sent + // To record how long ago when the last log message was sent time::Duration lastMsgSentDur_; // To record when the last message was accepted by majority peers uint64_t lastMsgAcceptedTime_{0}; diff --git a/src/kvstore/raftex/test/TestShard.cpp b/src/kvstore/raftex/test/TestShard.cpp index 66d419ad915..ef3e58a33a0 100644 --- a/src/kvstore/raftex/test/TestShard.cpp +++ b/src/kvstore/raftex/test/TestShard.cpp @@ -165,15 +165,14 @@ void TestShard::onLeaderReady(TermID term) { UNUSED(term); } -nebula::cpp2::ErrorCode TestShard::commitLogs(std::unique_ptr iter, bool) { - LogID firstId = -1; - LogID lastId = -1; +std::tuple TestShard::commitLogs( + std::unique_ptr iter, bool) { + LogID lastId = kNoCommitLogId; + TermID lastTerm = kNoCommitLogTerm; int32_t commitLogsNum = 0; while (iter->valid()) { - if (firstId < 0) { - firstId = iter->logId(); - } lastId = iter->logId(); + lastTerm = iter->logTerm(); auto log = iter->logMsg(); if (!log.empty()) { switch (static_cast(log[0])) { @@ -204,14 +203,15 @@ nebula::cpp2::ErrorCode TestShard::commitLogs(std::unique_ptr iter, } ++(*iter); } - VLOG(2) << "TestShard: " << idStr_ << "Committed log " << firstId << " to " << lastId; + VLOG(2) << "TestShard: " << idStr_ << "Committed log " + << " up to " << lastId; if (lastId > -1) { lastCommittedLogId_ = lastId; } if (commitLogsNum > 0) { commitTimes_++; } - return nebula::cpp2::ErrorCode::SUCCEEDED; + return {nebula::cpp2::ErrorCode::SUCCEEDED, lastId, lastTerm}; } std::pair TestShard::commitSnapshot(const std::vector& data, @@ -236,10 +236,11 @@ std::pair TestShard::commitSnapshot(const std::vector iter, bool wait) override; + std::tuple commitLogs(std::unique_ptr iter, + bool wait) override; bool preProcessLog(LogID, TermID, ClusterID, const std::string& log) override { if (!log.empty()) { @@ -120,7 +121,7 @@ class TestShard : public RaftPart { TermID committedLogTerm, bool finished) override; - void cleanup() override; + nebula::cpp2::ErrorCode cleanup() override; size_t getNumLogs() const; bool getLogMsg(size_t index, folly::StringPiece& msg); diff --git a/src/kvstore/test/NebulaListenerTest.cpp b/src/kvstore/test/NebulaListenerTest.cpp index 8d46591260e..94831bbbb08 100644 --- a/src/kvstore/test/NebulaListenerTest.cpp +++ b/src/kvstore/test/NebulaListenerTest.cpp @@ -107,11 +107,12 @@ class DummyListener : public Listener { return lastApplyLogId_; } - void cleanup() override { + nebula::cpp2::ErrorCode cleanup() override { data_.clear(); leaderCommitId_ = 0; lastApplyLogId_ = 0; snapshotBatchCount_ = 0; + return nebula::cpp2::ErrorCode::SUCCEEDED; } private: diff --git a/src/kvstore/wal/AtomicLogBuffer.h b/src/kvstore/wal/AtomicLogBuffer.h index 98fe8ee3ceb..9d3d973ffd1 100644 --- a/src/kvstore/wal/AtomicLogBuffer.h +++ b/src/kvstore/wal/AtomicLogBuffer.h @@ -54,10 +54,12 @@ struct Node { } Record* rec(int32_t index) { + if (UNLIKELY(index >= kMaxLength)) { + return nullptr; + } CHECK_GE(index, 0); auto pos = pos_.load(std::memory_order_acquire); CHECK_LE(index, pos); - CHECK(index != kMaxLength); return &(*records_)[index]; } @@ -176,8 +178,11 @@ class AtomicLogBuffer : public std::enable_shared_from_this { currNode_ = logBuffer_->seek(logId); if (currNode_ != nullptr) { currIndex_ = logId - currNode_->firstLogId_; + // Since reader is only a snapshot, a possible case is that logId > currNode->firstLogId_, + // however, the logId we search may not in currNode. (e.g. currNode_ is the latest node, + // but currIndex_ >= kMaxLength). In this case, currRec_ will be an invalid one. currRec_ = currNode_->rec(currIndex_); - valid_ = true; + valid_ = (currRec_ != nullptr); } else { valid_ = false; } diff --git a/src/kvstore/wal/WalFileIterator.cpp b/src/kvstore/wal/WalFileIterator.cpp index 98021a1b452..c390dbb5e75 100644 --- a/src/kvstore/wal/WalFileIterator.cpp +++ b/src/kvstore/wal/WalFileIterator.cpp @@ -13,13 +13,7 @@ namespace nebula { namespace wal { WalFileIterator::WalFileIterator(std::shared_ptr wal, LogID startId, LogID lastId) - : wal_(wal), currId_(startId) { - if (lastId >= 0 && lastId <= wal_->lastLogId()) { - lastId_ = lastId; - } else { - lastId_ = wal_->lastLogId(); - } - + : wal_(wal), lastId_(lastId), currId_(startId) { if (currId_ > lastId_) { LOG(ERROR) << wal_->idStr_ << "The log " << currId_ << " is out of range, the lastLogId is " << lastId_; @@ -60,11 +54,11 @@ WalFileIterator::WalFileIterator(std::shared_ptr wal, LogID startI } nextFirstId_ = getFirstIdInNextFile(); + // log in range [startId, lastId] is located in last wal, however, the wal is rollbacked during + // building the iterator if (currId_ > idRanges_.front().second) { - LOG(FATAL) << wal_->idStr_ << "currId_ " << currId_ << ", idRanges.front firstLogId " - << idRanges_.front().first << ", idRanges.front lastLogId " - << idRanges_.front().second << ", idRanges size " << idRanges_.size() << ", lastId_ " - << lastId_ << ", nextFirstId_ " << nextFirstId_; + currId_ = lastId_ + 1; + return; } if (!idRanges_.empty()) { @@ -74,18 +68,27 @@ WalFileIterator::WalFileIterator(std::shared_ptr wal, LogID startI LogID logId; // Read the logID int fd = fds_.front(); - CHECK_EQ(pread(fd, reinterpret_cast(&logId), sizeof(LogID), currPos_), - static_cast(sizeof(LogID))); + if (pread(fd, reinterpret_cast(&logId), sizeof(LogID), currPos_) != + static_cast(sizeof(LogID))) { + eof_ = true; + break; + } // Read the termID - CHECK_EQ( - pread(fd, reinterpret_cast(&currTerm_), sizeof(TermID), currPos_ + sizeof(LogID)), - static_cast(sizeof(TermID))); + if (pread( + fd, reinterpret_cast(&currTerm_), sizeof(TermID), currPos_ + sizeof(LogID)) != + static_cast(sizeof(TermID))) { + eof_ = true; + break; + } // Read the log length - CHECK_EQ(pread(fd, - reinterpret_cast(&currMsgLen_), - sizeof(int32_t), - currPos_ + sizeof(LogID) + sizeof(TermID)), - static_cast(sizeof(int32_t))); + if (pread(fd, + reinterpret_cast(&currMsgLen_), + sizeof(int32_t), + currPos_ + sizeof(LogID) + sizeof(TermID)) != + static_cast(sizeof(int32_t))) { + eof_ = true; + break; + } if (logId == currId_) { break; } @@ -135,28 +138,41 @@ LogIterator& WalFileIterator::operator++() { } else { LogID logId; int fd = fds_.front(); - // Read the logID - CHECK_EQ(pread(fd, reinterpret_cast(&logId), sizeof(LogID), currPos_), - static_cast(sizeof(LogID))) - << "currPos = " << currPos_; - CHECK_EQ(currId_, logId); - // Read the termID - CHECK_EQ( - pread(fd, reinterpret_cast(&currTerm_), sizeof(TermID), currPos_ + sizeof(LogID)), - static_cast(sizeof(TermID))); - // Read the log length - CHECK_EQ(pread(fd, - reinterpret_cast(&currMsgLen_), - sizeof(int32_t), - currPos_ + sizeof(TermID) + sizeof(LogID)), - static_cast(sizeof(int32_t))); + do { + // Read the logID + if (pread(fd, reinterpret_cast(&logId), sizeof(LogID), currPos_) != + static_cast(sizeof(LogID))) { + LOG(WARNING) << "Failed to read logId currPos = " << currPos_; + eof_ = true; + break; + } + CHECK_EQ(currId_, logId); + // Read the termID + if (pread( + fd, reinterpret_cast(&currTerm_), sizeof(TermID), currPos_ + sizeof(LogID)) != + static_cast(sizeof(TermID))) { + LOG(WARNING) << "Failed to read term currPos = " << currPos_; + eof_ = true; + break; + } + // Read the log length + if (pread(fd, + reinterpret_cast(&currMsgLen_), + sizeof(int32_t), + currPos_ + sizeof(TermID) + sizeof(LogID)) != + static_cast(sizeof(int32_t))) { + LOG(WARNING) << "Failed to read log length currPos = " << currPos_; + eof_ = true; + break; + } + } while (false); } return *this; } bool WalFileIterator::valid() const { - return currId_ <= lastId_; + return !eof_ && currId_ <= lastId_; } LogID WalFileIterator::logId() const { diff --git a/src/kvstore/wal/WalFileIterator.h b/src/kvstore/wal/WalFileIterator.h index 9e5a3919cc6..2fb032bc53d 100644 --- a/src/kvstore/wal/WalFileIterator.h +++ b/src/kvstore/wal/WalFileIterator.h @@ -45,6 +45,8 @@ class WalFileIterator final : public LogIterator { LogID currId_; TermID currTerm_; + // When there are more wals, nextFirstId_ is the firstLogId in next wal. + // When there are not more wals, nextFirstId_ is the current wal's lastLogId + 1 LogID nextFirstId_; // [firstId, lastId] @@ -52,6 +54,8 @@ class WalFileIterator final : public LogIterator { std::list fds_; int64_t currPos_{0}; int32_t currMsgLen_{0}; + // Whether we have encounter end of wal file during building iterator or iterating + bool eof_{false}; mutable std::string currLog_; }; From 4d05fb0a20ea3126c3ea729ab958b88114722730 Mon Sep 17 00:00:00 2001 From: Alex Xing <90179377+SuperYoko@users.noreply.github.com> Date: Mon, 27 Dec 2021 19:02:57 +0800 Subject: [PATCH 4/8] Add standalone version for nebula (#3310) * Start standalone version(graph+storage+meta) * add kv for compile/disable test use * fix obj compile * Modify thread usage * fix value set * use shared ptr to avoid lifetime * fix up * fix ret value type * Fix default config * Fix rebase Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- CMakeLists.txt | 4 + cmake/nebula/GeneralCMakeOptions.cmake | 1 + conf/CMakeLists.txt | 19 + conf/nebula-standalone.conf.default | 164 +++++++ scripts/CMakeLists.txt | 29 +- scripts/nebula-standalone.service | 6 + scripts/nebula.service | 12 +- src/clients/storage/GeneralStorageClient.h | 56 +++ src/clients/storage/InternalStorageClient.h | 10 +- src/clients/storage/StorageClient.cpp | 177 ++++--- src/clients/storage/StorageClient.h | 19 +- src/clients/storage/StorageClientBase-inl.h | 65 +-- src/clients/storage/StorageClientBase.h | 4 +- src/common/thrift/ThriftClientManager-inl.h | 2 +- src/common/thrift/ThriftLocalClientManager.h | 39 ++ src/daemons/CMakeLists.txt | 78 ++- src/daemons/MetaDaemon.cpp | 143 +----- src/daemons/MetaDaemonInit.cpp | 193 ++++++++ src/daemons/MetaDaemonInit.h | 22 + src/daemons/StandAloneDaemon.cpp | 444 ++++++++++++++++++ src/graph/service/GraphFlags.cpp | 12 +- src/kvstore/Listener.cpp | 2 +- .../plugins/elasticsearch/ESListener.cpp | 2 +- src/storage/CMakeLists.txt | 6 + src/storage/GraphStorageLocalServer.cpp | 150 ++++++ src/storage/GraphStorageLocalServer.h | 66 +++ src/storage/StorageServer.cpp | 30 +- src/storage/StorageServer.h | 5 + src/tools/CMakeLists.txt | 4 + src/webservice/WebService.cpp | 10 +- src/webservice/WebService.h | 9 +- 31 files changed, 1491 insertions(+), 292 deletions(-) create mode 100644 conf/nebula-standalone.conf.default create mode 100644 scripts/nebula-standalone.service create mode 100644 src/clients/storage/GeneralStorageClient.h create mode 100644 src/common/thrift/ThriftLocalClientManager.h create mode 100644 src/daemons/MetaDaemonInit.cpp create mode 100644 src/daemons/MetaDaemonInit.h create mode 100644 src/daemons/StandAloneDaemon.cpp create mode 100644 src/storage/GraphStorageLocalServer.cpp create mode 100644 src/storage/GraphStorageLocalServer.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d9dee102f0..2e6729faa47 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,10 @@ option(ENABLE_INCLUDE_WHAT_YOU_USE "Enable include-what-you-use find nouse incl add_definitions(-DNEBULA_HOME=${CMAKE_SOURCE_DIR}) +if(ENABLE_STANDALONE_VERSION) + add_definitions(-DBUILD_STANDALONE) +endif() + list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/nebula) diff --git a/cmake/nebula/GeneralCMakeOptions.cmake b/cmake/nebula/GeneralCMakeOptions.cmake index 8fc3fd7a821..55583470f4d 100644 --- a/cmake/nebula/GeneralCMakeOptions.cmake +++ b/cmake/nebula/GeneralCMakeOptions.cmake @@ -18,6 +18,7 @@ option(ENABLE_COMPRESSED_DEBUG_INFO "Compress debug info to reduce binary si option(ENABLE_CLANG_TIDY "Enable clang-tidy if present" OFF) option(ENABLE_GDB_SCRIPT_SECTION "Add .debug_gdb_scripts section" OFF) option(DISABLE_CXX11_ABI "Whether to disable cxx11 abi" OFF) +option(ENABLE_STANDALONE_VERSION "Enable standalone version build" OFF) get_cmake_property(variable_list VARIABLES) foreach(_varname ${variable_list}) diff --git a/conf/CMakeLists.txt b/conf/CMakeLists.txt index d75644e1fbf..0ec9079790c 100644 --- a/conf/CMakeLists.txt +++ b/conf/CMakeLists.txt @@ -5,6 +5,8 @@ # These configuration files are for reference to generate your own customized ones. # Thus, they are installed as read-only, even for the owner. +if(NOT ENABLE_STANDALONE_VERSION) + install( FILES nebula-graphd.conf.default @@ -47,3 +49,20 @@ install( COMPONENT storage ) + +else() + +install( + FILES + nebula-standalone.conf.default + PERMISSIONS + OWNER_READ + GROUP_READ + WORLD_READ + DESTINATION + etc + COMPONENT + graph + ) + +endif() diff --git a/conf/nebula-standalone.conf.default b/conf/nebula-standalone.conf.default new file mode 100644 index 00000000000..b356ef21691 --- /dev/null +++ b/conf/nebula-standalone.conf.default @@ -0,0 +1,164 @@ +########## basics ########## +# Whether to run as a daemon process +--daemonize=true +# The file to host the process id +--pid_file=pids/nebula-standalone.pid +# Whether to enable optimizer +--enable_optimizer=true +# The default charset when a space is created +--default_charset=utf8 +# The defaule collate when a space is created +--default_collate=utf8_bin +# Whether to use the configuration obtained from the configuration file +--local_config=true + +########## logging ########## +# The directory to host logging files +--log_dir=logs +# Log level, 0, 1, 2, 3 for INFO, WARNING, ERROR, FATAL respectively +--minloglevel=0 +# Verbose log level, 1, 2, 3, 4, the higher of the level, the more verbose of the logging +--v=0 +# Maximum seconds to buffer the log messages +--logbufsecs=0 +# Whether to redirect stdout and stderr to separate output files +--redirect_stdout=true +# Destination filename of stdout and stderr, which will also reside in log_dir. +--stdout_log_file=standalone-stdout.log +--stderr_log_file=standalone-stderr.log +# Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively. +--stderrthreshold=2 + +########## query ########## +# Whether to treat partial success as an error. +# This flag is only used for Read-only access, and Modify access always treats partial success as an error. +--accept_partial_success=false +# Maximum sentence length, unit byte +--max_allowed_query_size=4194304 + +########## networking ########## +# Comma separated Meta Server Addresses +--meta_server_addrs=127.0.0.1:9559 +# Local IP used to identify the nebula-graphd process. +# Change it to an address other than loopback if the service is distributed or +# will be accessed remotely. +--local_ip=127.0.0.1 +# Network device to listen on +--listen_netdev=any +# Port to listen on +--port=9669 +--meta_port=9559 +--storage_port=9779 +# To turn on SO_REUSEPORT or not +--reuse_port=false +# Backlog of the listen socket, adjust this together with net.core.somaxconn +--listen_backlog=1024 +# The number of seconds Nebula service waits before closing the idle connections +--client_idle_timeout_secs=28800 +# The number of seconds before idle sessions expire +# The range should be in [1, 604800] +--session_idle_timeout_secs=28800 +# The number of threads to accept incoming connections +--num_accept_threads=1 +# The number of networking IO threads, 0 for # of CPU cores +--num_netio_threads=0 +# The number of threads to execute user queries, 0 for # of CPU cores +--num_worker_threads=0 +# HTTP service ip +--ws_ip=0.0.0.0 +# HTTP service port +--ws_http_port=19669 +# HTTP2 service port +--ws_h2_port=19670 +# storage client timeout +--storage_client_timeout_ms=60000 +# Port to listen on Meta with HTTP protocol, it corresponds to ws_http_port in metad's configuration file +--ws_meta_http_port=19559 +# HTTP service port +--ws_storage_http_port=19779 +# HTTP2 service port +--ws_storage_h2_port=19780 +# heartbeat with meta service +--heartbeat_interval_secs=10 + + +########## authentication ########## +# Enable authorization +--enable_authorize=false +# User login authentication type, password for nebula authentication, ldap for ldap authentication, cloud for cloud authentication +--auth_type=password + +########## memory ########## +# System memory high watermark ratio +--system_memory_high_watermark_ratio=0.8 + +########## experimental feature ########## +# if use experimental features +--enable_experimental_feature=false + +######### Raft ######### +# Raft election timeout +--raft_heartbeat_interval_secs=30 +# RPC timeout for raft client (ms) +--raft_rpc_timeout_ms=500 +## recycle Raft WAL +--wal_ttl=14400 + +########## Disk ########## +# Root data path. Split by comma. e.g. --data_path=/disk1/path1/,/disk2/path2/ +# One path per Rocksdb instance. +--data_path=data/storage + +# Minimum reserved bytes of each data path +--minimum_reserved_bytes=268435456 + +# The default reserved bytes for one batch operation +--rocksdb_batch_size=4096 +# The default block cache size used in BlockBasedTable. +# The unit is MB. +--rocksdb_block_cache=4 +# The type of storage engine, `rocksdb', `memory', etc. +--engine_type=rocksdb + +# Compression algorithm, options: no,snappy,lz4,lz4hc,zlib,bzip2,zstd +# For the sake of binary compatibility, the default value is snappy. +# Recommend to use: +# * lz4 to gain more CPU performance, with the same compression ratio with snappy +# * zstd to occupy less disk space +# * lz4hc for the read-heavy write-light scenario +--rocksdb_compression=lz4 + +# Set different compressions for different levels +# For example, if --rocksdb_compression is snappy, +# "no:no:lz4:lz4::zstd" is identical to "no:no:lz4:lz4:snappy:zstd:snappy" +# In order to disable compression for level 0/1, set it to "no:no" +--rocksdb_compression_per_level= + +# Whether or not to enable rocksdb's statistics, disabled by default +--enable_rocksdb_statistics=false + +# Statslevel used by rocksdb to collection statistics, optional values are +# * kExceptHistogramOrTimers, disable timer stats, and skip histogram stats +# * kExceptTimers, Skip timer stats +# * kExceptDetailedTimers, Collect all stats except time inside mutex lock AND time spent on compression. +# * kExceptTimeForMutex, Collect all stats except the counters requiring to get time inside the mutex lock. +# * kAll, Collect all stats +--rocksdb_stats_level=kExceptHistogramOrTimers + +# Whether or not to enable rocksdb's prefix bloom filter, enabled by default. +--enable_rocksdb_prefix_filtering=true +# Whether or not to enable rocksdb's whole key bloom filter, disabled by default. +--enable_rocksdb_whole_key_filtering=false + +############## rocksdb Options ############## +# rocksdb DBOptions in json, each name and value of option is a string, given as "option_name":"option_value" separated by comma +--rocksdb_db_options={} +# rocksdb ColumnFamilyOptions in json, each name and value of option is string, given as "option_name":"option_value" separated by comma +--rocksdb_column_family_options={"write_buffer_size":"67108864","max_write_buffer_number":"4","max_bytes_for_level_base":"268435456"} +# rocksdb BlockBasedTableOptions in json, each name and value of option is string, given as "option_name":"option_value" separated by comma +--rocksdb_block_based_table_options={"block_size":"8192"} + +############## meta Options ############## +--meta_data_path=data/meta +--default_replica_factor=1 +--default_parts_num=100 diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 5d375d2bd14..02cbf67a2c8 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -2,6 +2,8 @@ # # This source code is licensed under Apache 2.0 License. +if(NOT ENABLE_STANDALONE_VERSION) + install( FILES nebula-storaged.service @@ -30,7 +32,7 @@ install( install( FILES - nebula.service + nebula-graphd.service PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ @@ -38,12 +40,29 @@ install( DESTINATION scripts COMPONENT - common + graph ) +else() + install( FILES - utils.sh + nebula-standalone.service + PERMISSIONS + OWNER_EXECUTE OWNER_WRITE OWNER_READ + GROUP_EXECUTE GROUP_READ + WORLD_EXECUTE WORLD_READ + DESTINATION + scripts + COMPONENT + graph +) + +endif() + +install( + FILES + nebula.service PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ @@ -56,7 +75,7 @@ install( install( FILES - nebula-graphd.service + utils.sh PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ @@ -64,7 +83,7 @@ install( DESTINATION scripts COMPONENT - graph + common ) install( diff --git a/scripts/nebula-standalone.service b/scripts/nebula-standalone.service new file mode 100644 index 00000000000..3405081cb0b --- /dev/null +++ b/scripts/nebula-standalone.service @@ -0,0 +1,6 @@ +#! /bin/bash + +SCRIPT_PATH=$(readlink -f $0) +SCRIPT_DIR=$(dirname ${SCRIPT_PATH}) +export USAGE_INFO="${0} [-v] [-c /path/to/conf] " +${SCRIPT_DIR}/nebula.service $@ standalone diff --git a/scripts/nebula.service b/scripts/nebula.service index 04f38c074f6..7fea4a2f977 100755 --- a/scripts/nebula.service +++ b/scripts/nebula.service @@ -77,6 +77,13 @@ fi ACTION=${1} TARGET=${2} +if [ -e ${INSTALL_ROOT}/bin/nebula-standalone ] +then + ALLTARGET=(standalone) +else + ALLTARGET=(metad graphd storaged) +fi + # Collect the daemons on which we perform the action on case ${TARGET} in metad) @@ -88,8 +95,11 @@ case ${TARGET} in storaged) TARGETS=(${TARGET}) ;; + standalone) + TARGETS=(${TARGET}) + ;; all) - TARGETS=(metad graphd storaged) + TARGETS=${ALLTARGET[*]} ;; *) ERROR "Unknown daemon \`${DAEMON}'" diff --git a/src/clients/storage/GeneralStorageClient.h b/src/clients/storage/GeneralStorageClient.h new file mode 100644 index 00000000000..805c37d7a90 --- /dev/null +++ b/src/clients/storage/GeneralStorageClient.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#ifndef CLIENTS_STORAGE_GENERALSTORAGECLIENT_H_ +#define CLIENTS_STORAGE_GENERALSTORAGECLIENT_H_ + +#include + +#include "clients/meta/MetaClient.h" +#include "clients/storage/StorageClientBase.h" +#include "common/base/Base.h" +#include "common/datatypes/KeyValue.h" +#include "common/thrift/ThriftClientManager.h" +#include "interface/gen-cpp2/GeneralStorageServiceAsyncClient.h" + +namespace nebula { +namespace storage { + +/** + * A wrapper class for GeneralStorageServiceAsyncClient thrift API + * + * The class is NOT reentrant + */ +class GeneralStorageClient + : public StorageClientBase< + cpp2::GeneralStorageServiceAsyncClient, + thrift::ThriftClientManager> { + using Parent = + StorageClientBase>; + + public: + GeneralStorageClient(std::shared_ptr ioThreadPool, + meta::MetaClient* metaClient) + : Parent(ioThreadPool, metaClient) {} + virtual ~GeneralStorageClient() {} + + folly::SemiFuture> get(GraphSpaceID space, + std::vector&& keys, + bool returnPartly = false, + folly::EventBase* evb = nullptr); + + folly::SemiFuture> put(GraphSpaceID space, + std::vector kvs, + folly::EventBase* evb = nullptr); + + folly::SemiFuture> remove(GraphSpaceID space, + std::vector keys, + folly::EventBase* evb = nullptr); +}; + +} // namespace storage +} // namespace nebula +#endif // CLIENTS_STORAGE_GENERALSTORAGECLIENT_H_ diff --git a/src/clients/storage/InternalStorageClient.h b/src/clients/storage/InternalStorageClient.h index fe16b78c0a0..f34311f605a 100644 --- a/src/clients/storage/InternalStorageClient.h +++ b/src/clients/storage/InternalStorageClient.h @@ -11,6 +11,7 @@ #include "clients/storage/StorageClientBase.h" #include "common/base/Base.h" #include "common/base/ErrorOr.h" +#include "common/thrift/ThriftClientManager.h" #include "interface/gen-cpp2/InternalStorageServiceAsyncClient.h" namespace nebula { @@ -21,8 +22,13 @@ namespace storage { * * The class is NOT reentrant */ -class InternalStorageClient : public StorageClientBase { - using Parent = StorageClientBase; +class InternalStorageClient + : public StorageClientBase< + cpp2::InternalStorageServiceAsyncClient, + thrift::ThriftClientManager> { + using Parent = + StorageClientBase>; public: InternalStorageClient(std::shared_ptr ioThreadPool, diff --git a/src/clients/storage/StorageClient.cpp b/src/clients/storage/StorageClient.cpp index 41cb0928715..61381598225 100644 --- a/src/clients/storage/StorageClient.cpp +++ b/src/clients/storage/StorageClient.cpp @@ -100,12 +100,11 @@ StorageRpcRespFuture StorageClient::getNeighbors( req.traverse_spec_ref() = std::move(spec); } - return collectResponse( - param.evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, const cpp2::GetNeighborsRequest& r) { - return client->future_getNeighbors(r); - }); + return collectResponse(param.evb, + std::move(requests), + [](ThriftClientType* client, const cpp2::GetNeighborsRequest& r) { + return client->future_getNeighbors(r); + }); } StorageRpcRespFuture StorageClient::addVertices( @@ -140,12 +139,11 @@ StorageRpcRespFuture StorageClient::addVertices( req.common_ref() = common; } - return collectResponse( - param.evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, const cpp2::AddVerticesRequest& r) { - return client->future_addVertices(r); - }); + return collectResponse(param.evb, + std::move(requests), + [](ThriftClientType* client, const cpp2::AddVerticesRequest& r) { + return client->future_addVertices(r); + }); } StorageRpcRespFuture StorageClient::addEdges(const CommonRequestParam& param, @@ -178,13 +176,13 @@ StorageRpcRespFuture StorageClient::addEdges(const CommonReq req.prop_names_ref() = propNames; req.common_ref() = common; } - return collectResponse( - param.evb, - std::move(requests), - [useToss = param.useExperimentalFeature](cpp2::GraphStorageServiceAsyncClient* client, - const cpp2::AddEdgesRequest& r) { - return useToss ? client->future_chainAddEdges(r) : client->future_addEdges(r); - }); + return collectResponse(param.evb, + std::move(requests), + [useToss = param.useExperimentalFeature](ThriftClientType* client, + const cpp2::AddEdgesRequest& r) { + return useToss ? client->future_chainAddEdges(r) + : client->future_addEdges(r); + }); } StorageRpcRespFuture StorageClient::getProps( @@ -237,10 +235,10 @@ StorageRpcRespFuture StorageClient::getProps( req.common_ref() = common; } - return collectResponse(param.evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, - const cpp2::GetPropRequest& r) { return client->future_getProps(r); }); + return collectResponse( + param.evb, std::move(requests), [](ThriftClientType* client, const cpp2::GetPropRequest& r) { + return client->future_getProps(r); + }); } StorageRpcRespFuture StorageClient::deleteEdges( @@ -268,12 +266,11 @@ StorageRpcRespFuture StorageClient::deleteEdges( req.common_ref() = common; } - return collectResponse( - param.evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, const cpp2::DeleteEdgesRequest& r) { - return client->future_deleteEdges(r); - }); + return collectResponse(param.evb, + std::move(requests), + [](ThriftClientType* client, const cpp2::DeleteEdgesRequest& r) { + return client->future_deleteEdges(r); + }); } StorageRpcRespFuture StorageClient::deleteVertices( @@ -301,12 +298,11 @@ StorageRpcRespFuture StorageClient::deleteVertices( req.common_ref() = common; } - return collectResponse( - param.evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, const cpp2::DeleteVerticesRequest& r) { - return client->future_deleteVertices(r); - }); + return collectResponse(param.evb, + std::move(requests), + [](ThriftClientType* client, const cpp2::DeleteVerticesRequest& r) { + return client->future_deleteVertices(r); + }); } StorageRpcRespFuture StorageClient::deleteTags( @@ -334,12 +330,11 @@ StorageRpcRespFuture StorageClient::deleteTags( req.common_ref() = common; } - return collectResponse( - param.evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, const cpp2::DeleteTagsRequest& r) { - return client->future_deleteTags(r); - }); + return collectResponse(param.evb, + std::move(requests), + [](ThriftClientType* client, const cpp2::DeleteTagsRequest& r) { + return client->future_deleteTags(r); + }); } folly::Future> StorageClient::updateVertex( @@ -388,12 +383,11 @@ folly::Future> StorageClient::updateVert } request.second = std::move(req); - return getResponse( - param.evb, - std::move(request), - [](cpp2::GraphStorageServiceAsyncClient* client, const cpp2::UpdateVertexRequest& r) { - return client->future_updateVertex(r); - }); + return getResponse(param.evb, + std::move(request), + [](ThriftClientType* client, const cpp2::UpdateVertexRequest& r) { + return client->future_updateVertex(r); + }); } folly::Future> StorageClient::updateEdge( @@ -441,14 +435,13 @@ folly::Future> StorageClient::updateEdge } request.second = std::move(req); - return getResponse( - param.evb, - std::move(request), - [useExperimentalFeature = param.useExperimentalFeature]( - cpp2::GraphStorageServiceAsyncClient* client, const cpp2::UpdateEdgeRequest& r) { - return useExperimentalFeature ? client->future_chainUpdateEdge(r) - : client->future_updateEdge(r); - }); + return getResponse(param.evb, + std::move(request), + [useExperimentalFeature = param.useExperimentalFeature]( + ThriftClientType* client, const cpp2::UpdateEdgeRequest& r) { + return useExperimentalFeature ? client->future_chainUpdateEdge(r) + : client->future_updateEdge(r); + }); } folly::Future> StorageClient::getUUID(GraphSpaceID space, @@ -478,11 +471,10 @@ folly::Future> StorageClient::getUUID(GraphSpaceID s req.name_ref() = name; request.second = std::move(req); - return getResponse(evb, - std::move(request), - [](cpp2::GraphStorageServiceAsyncClient* client, const cpp2::GetUUIDReq& r) { - return client->future_getUUID(r); - }); + return getResponse( + evb, std::move(request), [](ThriftClientType* client, const cpp2::GetUUIDReq& r) { + return client->future_getUUID(r); + }); } StorageRpcRespFuture StorageClient::lookupIndex( @@ -524,12 +516,11 @@ StorageRpcRespFuture StorageClient::lookupIndex( req.limit_ref() = limit; } - return collectResponse( - param.evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, const cpp2::LookupIndexRequest& r) { - return client->future_lookupIndex(r); - }); + return collectResponse(param.evb, + std::move(requests), + [](ThriftClientType* client, const cpp2::LookupIndexRequest& r) { + return client->future_lookupIndex(r); + }); } StorageRpcRespFuture StorageClient::lookupAndTraverse( @@ -554,12 +545,11 @@ StorageRpcRespFuture StorageClient::lookupAndTravers req.common_ref() = common; } - return collectResponse( - param.evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, const cpp2::LookupAndTraverseRequest& r) { - return client->future_lookupAndTraverse(r); - }); + return collectResponse(param.evb, + std::move(requests), + [](ThriftClientType* client, const cpp2::LookupAndTraverseRequest& r) { + return client->future_lookupAndTraverse(r); + }); } StorageRpcRespFuture StorageClient::scanEdge( @@ -587,10 +577,10 @@ StorageRpcRespFuture StorageClient::scanEdge( req.common_ref() = param.toReqCommon(); } - return collectResponse(param.evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, - const cpp2::ScanEdgeRequest& r) { return client->future_scanEdge(r); }); + return collectResponse( + param.evb, std::move(requests), [](ThriftClientType* client, const cpp2::ScanEdgeRequest& r) { + return client->future_scanEdge(r); + }); } StorageRpcRespFuture StorageClient::scanVertex( @@ -618,12 +608,11 @@ StorageRpcRespFuture StorageClient::scanVertex( req.common_ref() = param.toReqCommon(); } - return collectResponse( - param.evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, const cpp2::ScanVertexRequest& r) { - return client->future_scanVertex(r); - }); + return collectResponse(param.evb, + std::move(requests), + [](ThriftClientType* client, const cpp2::ScanVertexRequest& r) { + return client->future_scanVertex(r); + }); } folly::SemiFuture> StorageClient::get( @@ -646,10 +635,10 @@ folly::SemiFuture> StorageClient::get( req.return_partly_ref() = returnPartly; } - return collectResponse(evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, - const cpp2::KVGetRequest& r) { return client->future_get(r); }); + return collectResponse( + evb, std::move(requests), [](ThriftClientType* client, const cpp2::KVGetRequest& r) { + return client->future_get(r); + }); } folly::SemiFuture> StorageClient::put( @@ -671,10 +660,10 @@ folly::SemiFuture> StorageClient::put( req.parts_ref() = std::move(c.second); } - return collectResponse(evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, - const cpp2::KVPutRequest& r) { return client->future_put(r); }); + return collectResponse( + evb, std::move(requests), [](ThriftClientType* client, const cpp2::KVPutRequest& r) { + return client->future_put(r); + }); } folly::SemiFuture> StorageClient::remove( @@ -696,10 +685,10 @@ folly::SemiFuture> StorageClient::remove( req.parts_ref() = std::move(c.second); } - return collectResponse(evb, - std::move(requests), - [](cpp2::GraphStorageServiceAsyncClient* client, - const cpp2::KVRemoveRequest& r) { return client->future_remove(r); }); + return collectResponse( + evb, std::move(requests), [](ThriftClientType* client, const cpp2::KVRemoveRequest& r) { + return client->future_remove(r); + }); } StatusOr> StorageClient::getIdFromRow( diff --git a/src/clients/storage/StorageClient.h b/src/clients/storage/StorageClient.h index ae2fd3d4079..40c0dcb3aca 100644 --- a/src/clients/storage/StorageClient.h +++ b/src/clients/storage/StorageClient.h @@ -9,7 +9,10 @@ #include "clients/storage/StorageClientBase.h" #include "common/base/Base.h" +#include "common/thrift/ThriftClientManager.h" +#include "common/thrift/ThriftLocalClientManager.h" #include "interface/gen-cpp2/GraphStorageServiceAsyncClient.h" +#include "storage/GraphStorageLocalServer.h" namespace nebula { namespace storage { @@ -22,7 +25,18 @@ using StorageRpcRespFuture = folly::SemiFuture>; * * The class is NOT reentrant */ -class StorageClient : public StorageClientBase { +#ifndef BUILD_STANDALONE +using ThriftClientType = cpp2::GraphStorageServiceAsyncClient; +template +using ThriftClientManType = thrift::ThriftClientManager; +#else +using ThriftClientType = GraphStorageLocalServer; +template +using ThriftClientManType = thrift::LocalClientManager; + +#endif +class StorageClient + : public StorageClientBase> { FRIEND_TEST(StorageClientTest, LeaderChangeTest); public: @@ -46,7 +60,8 @@ class StorageClient : public StorageClientBase ioThreadPool, meta::MetaClient* metaClient) - : StorageClientBase(ioThreadPool, metaClient) {} + : StorageClientBase>(ioThreadPool, + metaClient) {} virtual ~StorageClient() {} StorageRpcRespFuture getNeighbors( diff --git a/src/clients/storage/StorageClientBase-inl.h b/src/clients/storage/StorageClientBase-inl.h index abeb3ed130c..6341e6dd74e 100644 --- a/src/clients/storage/StorageClientBase-inl.h +++ b/src/clients/storage/StorageClientBase-inl.h @@ -70,50 +70,52 @@ struct ResponseContext { bool fulfilled_{false}; }; -template -StorageClientBase::StorageClientBase( +template +StorageClientBase::StorageClientBase( std::shared_ptr threadPool, meta::MetaClient* metaClient) : metaClient_(metaClient), ioThreadPool_(threadPool) { - clientsMan_ = std::make_unique>(FLAGS_enable_ssl); + clientsMan_ = std::make_unique(FLAGS_enable_ssl); } -template -StorageClientBase::~StorageClientBase() { +template +StorageClientBase::~StorageClientBase() { VLOG(3) << "Destructing StorageClientBase"; if (nullptr != metaClient_) { metaClient_ = nullptr; } } -template -StatusOr StorageClientBase::getLeader(GraphSpaceID spaceId, - PartitionID partId) const { +template +StatusOr StorageClientBase::getLeader( + GraphSpaceID spaceId, PartitionID partId) const { return metaClient_->getStorageLeaderFromCache(spaceId, partId); } -template -void StorageClientBase::updateLeader(GraphSpaceID spaceId, - PartitionID partId, - const HostAddr& leader) { +template +void StorageClientBase::updateLeader(GraphSpaceID spaceId, + PartitionID partId, + const HostAddr& leader) { metaClient_->updateStorageLeader(spaceId, partId, leader); } -template -void StorageClientBase::invalidLeader(GraphSpaceID spaceId, PartitionID partId) { +template +void StorageClientBase::invalidLeader(GraphSpaceID spaceId, + PartitionID partId) { metaClient_->invalidStorageLeader(spaceId, partId); } -template -void StorageClientBase::invalidLeader(GraphSpaceID spaceId, - std::vector& partsId) { +template +void StorageClientBase::invalidLeader( + GraphSpaceID spaceId, std::vector& partsId) { for (const auto& partId : partsId) { invalidLeader(spaceId, partId); } } -template +template template -folly::SemiFuture> StorageClientBase::collectResponse( +folly::SemiFuture> +StorageClientBase::collectResponse( folly::EventBase* evb, std::unordered_map requests, RemoteFunc&& remoteFunc) { @@ -214,9 +216,9 @@ folly::SemiFuture> StorageClientBase::c return context->promise.getSemiFuture(); } -template +template template -folly::Future> StorageClientBase::getResponse( +folly::Future> StorageClientBase::getResponse( folly::EventBase* evb, std::pair&& request, RemoteFunc&& remoteFunc) { auto pro = std::make_shared>>(); auto f = pro->getFuture(); @@ -225,9 +227,9 @@ folly::Future> StorageClientBase::getResponse( return f; } -template +template template -void StorageClientBase::getResponseImpl( +void StorageClientBase::getResponseImpl( folly::EventBase* evb, std::pair request, RemoteFunc remoteFunc, @@ -291,14 +293,14 @@ void StorageClientBase::getResponseImpl( }); // via } -template +template template StatusOr>>> -StorageClientBase::clusterIdsToHosts(GraphSpaceID spaceId, - const Container& ids, - GetIdFunc f) const { +StorageClientBase::clusterIdsToHosts(GraphSpaceID spaceId, + const Container& ids, + GetIdFunc f) const { std::unordered_map>> clusters; @@ -330,9 +332,9 @@ StorageClientBase::clusterIdsToHosts(GraphSpaceID spaceId, return clusters; } -template +template StatusOr>> -StorageClientBase::getHostParts(GraphSpaceID spaceId) const { +StorageClientBase::getHostParts(GraphSpaceID spaceId) const { std::unordered_map> hostParts; auto status = metaClient_->partsNum(spaceId); if (!status.ok()) { @@ -350,9 +352,10 @@ StorageClientBase::getHostParts(GraphSpaceID spaceId) const { return hostParts; } -template +template StatusOr>> -StorageClientBase::getHostPartsWithCursor(GraphSpaceID spaceId) const { +StorageClientBase::getHostPartsWithCursor( + GraphSpaceID spaceId) const { std::unordered_map> hostParts; auto status = metaClient_->partsNum(spaceId); if (!status.ok()) { diff --git a/src/clients/storage/StorageClientBase.h b/src/clients/storage/StorageClientBase.h index 3ad810d4802..c86ee653207 100644 --- a/src/clients/storage/StorageClientBase.h +++ b/src/clients/storage/StorageClientBase.h @@ -116,7 +116,7 @@ class StorageRpcResponse final { /** * A base class for all storage clients */ -template +template class StorageClientBase { public: StatusOr getLeader(GraphSpaceID spaceId, PartitionID partId) const; @@ -220,7 +220,7 @@ class StorageClientBase { private: std::shared_ptr ioThreadPool_; - std::unique_ptr> clientsMan_; + std::unique_ptr clientsMan_; }; } // namespace storage diff --git a/src/common/thrift/ThriftClientManager-inl.h b/src/common/thrift/ThriftClientManager-inl.h index f823d43a48e..4b6dfd0ef6f 100644 --- a/src/common/thrift/ThriftClientManager-inl.h +++ b/src/common/thrift/ThriftClientManager-inl.h @@ -10,6 +10,7 @@ #include #include +#include "common/base/Base.h" #include "common/network/NetworkUtils.h" #include "common/ssl/SSLConfig.h" @@ -17,7 +18,6 @@ DECLARE_int32(conn_timeout_ms); namespace nebula { namespace thrift { - template std::shared_ptr ThriftClientManager::client(const HostAddr& host, folly::EventBase* evb, diff --git a/src/common/thrift/ThriftLocalClientManager.h b/src/common/thrift/ThriftLocalClientManager.h new file mode 100644 index 00000000000..c7229937056 --- /dev/null +++ b/src/common/thrift/ThriftLocalClientManager.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#pragma once + +#include +#include + +#include "common/base/Base.h" +#include "common/datatypes/HostAddr.h" +namespace nebula { +namespace thrift { + +template +class LocalClientManager final { + public: + std::shared_ptr client(const HostAddr& host, + folly::EventBase* evb = nullptr, + bool compatibility = false, + uint32_t timeout = 0) { + UNUSED(host); + UNUSED(evb); + UNUSED(compatibility); + UNUSED(timeout); + return ClientType::getInstance(); + } + + ~LocalClientManager() { + VLOG(3) << "~LocalClientManager"; + } + + explicit LocalClientManager(bool enableSSL = false) { + UNUSED(enableSSL); + VLOG(3) << "LocalClientManager"; + } +}; +} // namespace thrift +} // namespace nebula diff --git a/src/daemons/CMakeLists.txt b/src/daemons/CMakeLists.txt index cc0530863f7..e0b3ec19011 100644 --- a/src/daemons/CMakeLists.txt +++ b/src/daemons/CMakeLists.txt @@ -27,7 +27,6 @@ set(common_deps $ $ $ - $ $ $ $ @@ -57,6 +56,8 @@ set(storage_meta_deps $ ) +if(NOT ENABLE_STANDALONE_VERSION) + nebula_add_executable( NAME nebula-storaged @@ -89,6 +90,7 @@ nebula_add_executable( nebula-metad SOURCES MetaDaemon.cpp + MetaDaemonInit.cpp SetupLogging.cpp SetupBreakpad.cpp OBJECTS @@ -186,3 +188,77 @@ install( COMPONENT meta ) + +else() + +nebula_add_executable( + NAME + nebula-standalone + SOURCES + StandAloneDaemon.cpp + MetaDaemonInit.cpp + SetupLogging.cpp + SetupBreakpad.cpp + OBJECTS + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + ${storage_meta_deps} + ${common_deps} + LIBRARIES + ${ROCKSDB_LIBRARIES} + ${PROXYGEN_LIBRARIES} + ${THRIFT_LIBRARIES} + wangle +) + +install( + TARGETS + nebula-standalone + PERMISSIONS + OWNER_EXECUTE OWNER_WRITE OWNER_READ + GROUP_EXECUTE GROUP_READ + WORLD_EXECUTE WORLD_READ + DESTINATION + bin + COMPONENT + graph ## tmp use graph component +) + +endif() diff --git a/src/daemons/MetaDaemon.cpp b/src/daemons/MetaDaemon.cpp index 06dd0a9c9b8..11960bc7794 100644 --- a/src/daemons/MetaDaemon.cpp +++ b/src/daemons/MetaDaemon.cpp @@ -6,6 +6,7 @@ #include #include +#include "MetaDaemonInit.h" #include "common/base/Base.h" #include "common/base/SignalHandler.h" #include "common/fs/FileUtils.h" @@ -38,22 +39,16 @@ using nebula::ProcessUtils; using nebula::Status; using nebula::StatusOr; using nebula::network::NetworkUtils; -using nebula::web::PathParams; DEFINE_string(local_ip, "", "Local ip specified for NetworkUtils::getLocalIP"); DEFINE_int32(port, 45500, "Meta daemon listening port"); DEFINE_bool(reuse_port, true, "Whether to turn on the SO_REUSEPORT option"); -DEFINE_string(data_path, "", "Root data path"); -DEFINE_string(meta_server_addrs, - "", - "It is a list of IPs split by comma, used in cluster deployment" - "the ips number is equal to the replica number." - "If empty, it means it's a single node"); +DECLARE_string(data_path); +DECLARE_string(meta_server_addrs); + // DEFINE_string(local_ip, "", "Local ip specified for // NetworkUtils::getLocalIP"); -DEFINE_int32(num_io_threads, 16, "Number of IO threads"); DEFINE_int32(meta_http_thread_num, 3, "Number of meta daemon's http thread"); -DEFINE_int32(num_worker_threads, 32, "Number of workers"); DEFINE_string(pid_file, "pids/nebula-metad.pid", "File to hold the process id"); DEFINE_bool(daemonize, true, "Whether run as a daemon process"); @@ -68,133 +63,6 @@ extern Status setupLogging(); extern Status setupBreakpad(); #endif -namespace nebula { -namespace meta { -const std::string kClusterIdKey = "__meta_cluster_id_key__"; // NOLINT -} // namespace meta -} // namespace nebula - -nebula::ClusterID gClusterId = 0; - -std::unique_ptr initKV(std::vector peers, - nebula::HostAddr localhost) { - auto partMan = std::make_unique(); - // The meta server has only one space (0), one part (0) - partMan->addPart(nebula::kDefaultSpaceId, nebula::kDefaultPartId, std::move(peers)); - // folly IOThreadPoolExecutor - auto ioPool = std::make_shared(FLAGS_num_io_threads); - std::shared_ptr threadManager( - apache::thrift::concurrency::PriorityThreadManager::newPriorityThreadManager( - FLAGS_num_worker_threads)); - threadManager->setNamePrefix("executor"); - threadManager->start(); - nebula::kvstore::KVOptions options; - - auto absolute = boost::filesystem::absolute(FLAGS_data_path); - options.dataPaths_ = {absolute.string()}; - - options.partMan_ = std::move(partMan); - auto kvstore = std::make_unique( - std::move(options), ioPool, localhost, threadManager); - if (!(kvstore->init())) { - LOG(ERROR) << "Nebula store init failed"; - return nullptr; - } - - LOG(INFO) << "Waiting for the leader elected..."; - nebula::HostAddr leader; - while (true) { - auto ret = kvstore->partLeader(nebula::kDefaultSpaceId, nebula::kDefaultPartId); - if (!nebula::ok(ret)) { - LOG(ERROR) << "Nebula store init failed"; - return nullptr; - } - leader = nebula::value(ret); - if (leader != nebula::HostAddr("", 0)) { - break; - } - LOG(INFO) << "Leader has not been elected, sleep 1s"; - sleep(1); - } - - gClusterId = - nebula::meta::ClusterIdMan::getClusterIdFromKV(kvstore.get(), nebula::meta::kClusterIdKey); - if (gClusterId == 0) { - if (leader == localhost) { - LOG(INFO) << "I am leader, create cluster Id"; - gClusterId = nebula::meta::ClusterIdMan::create(FLAGS_meta_server_addrs); - if (!nebula::meta::ClusterIdMan::persistInKV( - kvstore.get(), nebula::meta::kClusterIdKey, gClusterId)) { - LOG(ERROR) << "Persist cluster failed!"; - return nullptr; - } - } else { - LOG(INFO) << "I am follower, wait for the leader's clusterId"; - while (gClusterId == 0) { - LOG(INFO) << "Waiting for the leader's clusterId"; - sleep(1); - gClusterId = nebula::meta::ClusterIdMan::getClusterIdFromKV(kvstore.get(), - nebula::meta::kClusterIdKey); - } - } - } - - auto version = nebula::meta::MetaVersionMan::getMetaVersionFromKV(kvstore.get()); - LOG(INFO) << "Get meta version is " << static_cast(version); - if (version == nebula::meta::MetaVersion::UNKNOWN) { - LOG(ERROR) << "Meta version is invalid"; - return nullptr; - } else if (version == nebula::meta::MetaVersion::V1) { - if (leader == localhost) { - LOG(INFO) << "I am leader, begin upgrade meta data"; - // need to upgrade the v1.0 meta data format to v2.0 meta data format - auto ret = nebula::meta::MetaVersionMan::updateMetaV1ToV2(kvstore.get()); - if (!ret.ok()) { - LOG(ERROR) << ret; - return nullptr; - } - } else { - LOG(INFO) << "I am follower, wait for leader to sync upgrade"; - while (version != nebula::meta::MetaVersion::V2) { - VLOG(1) << "Waiting for leader to upgrade"; - sleep(1); - version = nebula::meta::MetaVersionMan::getMetaVersionFromKV(kvstore.get()); - } - } - } - - if (leader == localhost) { - nebula::meta::MetaVersionMan::setMetaVersionToKV(kvstore.get()); - } - - LOG(INFO) << "Nebula store init succeeded, clusterId " << gClusterId; - return kvstore; -} - -Status initWebService(nebula::WebService* svc, - nebula::kvstore::KVStore* kvstore, - nebula::hdfs::HdfsCommandHelper* helper, - nebula::thread::GenericThreadPool* pool) { - LOG(INFO) << "Starting Meta HTTP Service"; - auto& router = svc->router(); - router.get("/download-dispatch").handler([kvstore, helper, pool](PathParams&&) { - auto handler = new nebula::meta::MetaHttpDownloadHandler(); - handler->init(kvstore, helper, pool); - return handler; - }); - router.get("/ingest-dispatch").handler([kvstore, pool](PathParams&&) { - auto handler = new nebula::meta::MetaHttpIngestHandler(); - handler->init(kvstore, pool); - return handler; - }); - router.get("/replace").handler([kvstore](PathParams&&) { - auto handler = new nebula::meta::MetaHttpReplaceHostHandler(); - handler->init(kvstore); - return handler; - }); - return svc->start(); -} - int main(int argc, char* argv[]) { google::SetVersionString(nebula::versionString()); // Detect if the server has already been started @@ -343,7 +211,8 @@ int main(int argc, char* argv[]) { return EXIT_FAILURE; } - auto handler = std::make_shared(gKVStore.get(), gClusterId); + auto handler = + std::make_shared(gKVStore.get(), metaClusterId()); LOG(INFO) << "The meta daemon start on " << localhost; try { gServer = std::make_unique(); diff --git a/src/daemons/MetaDaemonInit.cpp b/src/daemons/MetaDaemonInit.cpp new file mode 100644 index 00000000000..c41b1e02c06 --- /dev/null +++ b/src/daemons/MetaDaemonInit.cpp @@ -0,0 +1,193 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "MetaDaemonInit.h" + +#include +#include + +#include "common/base/Base.h" +#include "common/base/SignalHandler.h" +#include "common/fs/FileUtils.h" +#include "common/hdfs/HdfsCommandHelper.h" +#include "common/hdfs/HdfsHelper.h" +#include "common/network/NetworkUtils.h" +#include "common/ssl/SSLConfig.h" +#include "common/thread/GenericThreadPool.h" +#include "common/utils/MetaKeyUtils.h" +#include "kvstore/NebulaStore.h" +#include "kvstore/PartManager.h" +#include "meta/ActiveHostsMan.h" +#include "meta/KVBasedClusterIdMan.h" +#include "meta/MetaServiceHandler.h" +#include "meta/MetaVersionMan.h" +#include "meta/http/MetaHttpDownloadHandler.h" +#include "meta/http/MetaHttpIngestHandler.h" +#include "meta/http/MetaHttpReplaceHostHandler.h" +#include "meta/processors/job/JobManager.h" +#include "meta/stats/MetaStats.h" +#include "webservice/Router.h" +#include "webservice/WebService.h" + +#ifndef BUILD_STANDALONE +DEFINE_int32(num_io_threads, 16, "Number of IO threads"); +DEFINE_int32(num_worker_threads, 32, "Number of workers"); +DEFINE_string(data_path, "", "Root data path"); +DEFINE_string(meta_server_addrs, + "", + "It is a list of IPs split by comma, used in cluster deployment" + "the ips number is equal to the replica number." + "If empty, it means it's a single node"); +#else +DEFINE_int32(meta_num_io_threads, 16, "Number of IO threads"); +DEFINE_int32(meta_num_worker_threads, 32, "Number of workers"); +DEFINE_string(meta_data_path, "", "Root data path"); +DECLARE_string(meta_server_addrs); // use define from grap flags. +DECLARE_int32(ws_meta_http_port); +DECLARE_int32(ws_meta_h2_port); +#endif + +using nebula::web::PathParams; + +namespace nebula::meta { +const std::string kClusterIdKey = "__meta_cluster_id_key__"; // NOLINT +} // namespace nebula::meta + +nebula::ClusterID gClusterId = 0; +nebula::ClusterID& metaClusterId() { + return gClusterId; +} + +std::unique_ptr initKV(std::vector peers, + nebula::HostAddr localhost) { + auto partMan = std::make_unique(); + // The meta server has only one space (0), one part (0) + partMan->addPart(nebula::kDefaultSpaceId, nebula::kDefaultPartId, std::move(peers)); +#ifndef BUILD_STANDALONE + int32_t numMetaIoThreads = FLAGS_num_io_threads; + int32_t numMetaWorkerThreads = FLAGS_num_worker_threads; +#else + int32_t numMetaIoThreads = FLAGS_meta_num_io_threads; + int32_t numMetaWorkerThreads = FLAGS_meta_num_worker_threads; +#endif + // folly IOThreadPoolExecutor + auto ioPool = std::make_shared(numMetaIoThreads); + std::shared_ptr threadManager( + apache::thrift::concurrency::PriorityThreadManager::newPriorityThreadManager( + numMetaWorkerThreads)); + threadManager->setNamePrefix("executor"); + threadManager->start(); + nebula::kvstore::KVOptions options; +#ifndef BUILD_STANDALONE + auto absolute = boost::filesystem::absolute(FLAGS_data_path); +#else + auto absolute = boost::filesystem::absolute(FLAGS_meta_data_path); +#endif + options.dataPaths_ = {absolute.string()}; + options.partMan_ = std::move(partMan); + auto kvstore = std::make_unique( + std::move(options), ioPool, localhost, threadManager); + if (!(kvstore->init())) { + LOG(ERROR) << "Nebula store init failed"; + return nullptr; + } + + LOG(INFO) << "Waiting for the leader elected..."; + nebula::HostAddr leader; + while (true) { + auto ret = kvstore->partLeader(nebula::kDefaultSpaceId, nebula::kDefaultPartId); + if (!nebula::ok(ret)) { + LOG(ERROR) << "Nebula store init failed"; + return nullptr; + } + leader = nebula::value(ret); + if (leader != nebula::HostAddr("", 0)) { + break; + } + LOG(INFO) << "Leader has not been elected, sleep 1s"; + sleep(1); + } + + gClusterId = + nebula::meta::ClusterIdMan::getClusterIdFromKV(kvstore.get(), nebula::meta::kClusterIdKey); + if (gClusterId == 0) { + if (leader == localhost) { + LOG(INFO) << "I am leader, create cluster Id"; + gClusterId = nebula::meta::ClusterIdMan::create(FLAGS_meta_server_addrs); + if (!nebula::meta::ClusterIdMan::persistInKV( + kvstore.get(), nebula::meta::kClusterIdKey, gClusterId)) { + LOG(ERROR) << "Persist cluster failed!"; + return nullptr; + } + } else { + LOG(INFO) << "I am follower, wait for the leader's clusterId"; + while (gClusterId == 0) { + LOG(INFO) << "Waiting for the leader's clusterId"; + sleep(1); + gClusterId = nebula::meta::ClusterIdMan::getClusterIdFromKV(kvstore.get(), + nebula::meta::kClusterIdKey); + } + } + } + + auto version = nebula::meta::MetaVersionMan::getMetaVersionFromKV(kvstore.get()); + LOG(INFO) << "Get meta version is " << static_cast(version); + if (version == nebula::meta::MetaVersion::UNKNOWN) { + LOG(ERROR) << "Meta version is invalid"; + return nullptr; + } else if (version == nebula::meta::MetaVersion::V1) { + if (leader == localhost) { + LOG(INFO) << "I am leader, begin upgrade meta data"; + // need to upgrade the v1.0 meta data format to v2.0 meta data format + auto ret = nebula::meta::MetaVersionMan::updateMetaV1ToV2(kvstore.get()); + if (!ret.ok()) { + LOG(ERROR) << ret; + return nullptr; + } + } else { + LOG(INFO) << "I am follower, wait for leader to sync upgrade"; + while (version != nebula::meta::MetaVersion::V2) { + VLOG(1) << "Waiting for leader to upgrade"; + sleep(1); + version = nebula::meta::MetaVersionMan::getMetaVersionFromKV(kvstore.get()); + } + } + } + + if (leader == localhost) { + nebula::meta::MetaVersionMan::setMetaVersionToKV(kvstore.get()); + } + + LOG(INFO) << "Nebula store init succeeded, clusterId " << gClusterId; + return kvstore; +} + +nebula::Status initWebService(nebula::WebService* svc, + nebula::kvstore::KVStore* kvstore, + nebula::hdfs::HdfsCommandHelper* helper, + nebula::thread::GenericThreadPool* pool) { + LOG(INFO) << "Starting Meta HTTP Service"; + auto& router = svc->router(); + router.get("/download-dispatch").handler([kvstore, helper, pool](PathParams&&) { + auto handler = new nebula::meta::MetaHttpDownloadHandler(); + handler->init(kvstore, helper, pool); + return handler; + }); + router.get("/ingest-dispatch").handler([kvstore, pool](PathParams&&) { + auto handler = new nebula::meta::MetaHttpIngestHandler(); + handler->init(kvstore, pool); + return handler; + }); + router.get("/replace").handler([kvstore](PathParams&&) { + auto handler = new nebula::meta::MetaHttpReplaceHostHandler(); + handler->init(kvstore); + return handler; + }); +#ifndef BUILD_STANDALONE + return svc->start(); +#else + return svc->start(FLAGS_ws_meta_http_port, FLAGS_ws_meta_h2_port); +#endif +} diff --git a/src/daemons/MetaDaemonInit.h b/src/daemons/MetaDaemonInit.h new file mode 100644 index 00000000000..02dfa6f4eb4 --- /dev/null +++ b/src/daemons/MetaDaemonInit.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2019 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ +#pragma once + +#include + +#include "common/base/Status.h" +#include "common/hdfs/HdfsCommandHelper.h" +#include "kvstore/KVStore.h" +#include "webservice/WebService.h" + +nebula::ClusterID& metaClusterId(); + +std::unique_ptr initKV(std::vector peers, + nebula::HostAddr localhost); + +nebula::Status initWebService(nebula::WebService* svc, + nebula::kvstore::KVStore* kvstore, + nebula::hdfs::HdfsCommandHelper* helper, + nebula::thread::GenericThreadPool* pool); diff --git a/src/daemons/StandAloneDaemon.cpp b/src/daemons/StandAloneDaemon.cpp new file mode 100644 index 00000000000..8ea8ea1fcb5 --- /dev/null +++ b/src/daemons/StandAloneDaemon.cpp @@ -0,0 +1,444 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include +#include + +#include +#include +#include +#include + +#include "MetaDaemonInit.h" +#include "common/base/Base.h" +#include "common/base/SignalHandler.h" +#include "common/fs/FileUtils.h" +#include "common/hdfs/HdfsCommandHelper.h" +#include "common/network/NetworkUtils.h" +#include "common/process/ProcessUtils.h" +#include "common/ssl/SSLConfig.h" +#include "common/time/TimezoneInfo.h" +#include "common/utils/MetaKeyUtils.h" +#include "folly/ScopeGuard.h" +#include "graph/service/GraphFlags.h" +#include "graph/service/GraphService.h" +#include "graph/stats/StatsDef.h" +#include "meta/MetaServiceHandler.h" +#include "meta/MetaVersionMan.h" +#include "meta/RootUserMan.h" +#include "meta/http/MetaHttpDownloadHandler.h" +#include "meta/http/MetaHttpIngestHandler.h" +#include "meta/http/MetaHttpReplaceHostHandler.h" +#include "meta/processors/job/JobManager.h" +#include "storage/StorageServer.h" +#include "version/Version.h" +#include "webservice/WebService.h" + +using nebula::fs::FileUtils; +using nebula::graph::GraphService; +using nebula::operator<<; +using nebula::HostAddr; +using nebula::ProcessUtils; +using nebula::Status; +using nebula::StatusOr; +using nebula::network::NetworkUtils; + +void setupThreadManager(); +void printHelp(const char *prog); +void stopAllDaemon(); +static void signalHandler(int sig); +static Status setupSignalHandler(); +extern Status setupLogging(); +#if defined(__x86_64__) +extern Status setupBreakpad(); +#endif + +std::unique_ptr gStorageServer; +static std::unique_ptr gServer; +static std::unique_ptr gMetaServer; +static std::unique_ptr gMetaKVStore; +std::mutex gServerGuard; + +// common flags +DECLARE_string(flagfile); +DECLARE_bool(containerized); +DECLARE_bool(reuse_port); +DECLARE_string(meta_server_addrs); + +// storage gflags +DEFINE_string(data_path, + "", + "Root data path, multi paths should be split by comma." + "For rocksdb engine, one path one instance."); +DEFINE_string(wal_path, + "", + "Nebula wal path. By default, wal will be stored as a sibling of " + "rocksdb data."); +DEFINE_string(listener_path, + "", + "Path for listener, only wal will be saved." + "if it is not empty, data_path will not take effect."); +DECLARE_int32(storage_port); + +// meta gflags +DEFINE_int32(meta_http_thread_num, 3, "Number of meta daemon's http thread"); +DEFINE_int32(meta_port, 45500, "Meta daemon listening port"); + +int main(int argc, char *argv[]) { + google::SetVersionString(nebula::versionString()); + gflags::ParseCommandLineFlags(&argc, &argv, false); + + if (argc == 1) { + printHelp(argv[0]); + return EXIT_FAILURE; + } + if (argc == 2) { + if (::strcmp(argv[1], "-h") == 0) { + printHelp(argv[0]); + return EXIT_SUCCESS; + } + } + + folly::init(&argc, &argv, true); + if (FLAGS_enable_ssl || FLAGS_enable_graph_ssl || FLAGS_enable_meta_ssl) { + folly::ssl::init(); + } + nebula::initCounters(); + + // Setup logging + auto status = setupLogging(); + if (!status.ok()) { + LOG(ERROR) << status; + return EXIT_FAILURE; + } + +#if defined(__x86_64__) + status = setupBreakpad(); + if (!status.ok()) { + LOG(ERROR) << status; + return EXIT_FAILURE; + } +#endif + + // Detect if the server has already been started + auto pidPath = FLAGS_pid_file; + status = ProcessUtils::isPidAvailable(pidPath); + if (!status.ok()) { + LOG(ERROR) << status; + return EXIT_FAILURE; + } + + if (FLAGS_daemonize) { + google::SetStderrLogging(google::FATAL); + } else { + google::SetStderrLogging(google::INFO); + } + + if (FLAGS_daemonize) { + status = ProcessUtils::daemonize(pidPath); + if (!status.ok()) { + LOG(ERROR) << status; + return EXIT_FAILURE; + } + } else { + // Write the current pid into the pid file + status = ProcessUtils::makePidFile(pidPath); + if (!status.ok()) { + LOG(ERROR) << status; + return EXIT_FAILURE; + } + } + + // Validate the IPv4 address or hostname + status = NetworkUtils::validateHostOrIp(FLAGS_local_ip); + if (!status.ok()) { + LOG(ERROR) << status; + return EXIT_FAILURE; + } + + // Initialize the global timezone, it's only used for datetime type compute + // won't affect the process timezone. + status = nebula::time::Timezone::initializeGlobalTimezone(); + if (!status.ok()) { + LOG(ERROR) << status; + return EXIT_FAILURE; + } + + // Setup the signal handlers + status = setupSignalHandler(); + if (!status.ok()) { + LOG(ERROR) << status; + return EXIT_FAILURE; + } + + if (FLAGS_data_path.empty()) { + LOG(ERROR) << "Storage Data Path should not empty"; + return EXIT_FAILURE; + } + + bool metaReady = false; + int32_t metaRet = EXIT_FAILURE; + std ::unique_ptr metaThread = std::make_unique([&] { + SCOPE_EXIT { + stopAllDaemon(); + }; + nebula::HostAddr metaLocalhost{FLAGS_local_ip, FLAGS_meta_port}; + LOG(INFO) << "metalocalhost = " << metaLocalhost; + auto peersRet = nebula::network::NetworkUtils::toHosts(FLAGS_meta_server_addrs); + if (!peersRet.ok()) { + LOG(ERROR) << "Can't get peers address, status:" << peersRet.status(); + return; + } + gMetaKVStore = initKV(peersRet.value(), metaLocalhost); + if (gMetaKVStore == nullptr) { + LOG(ERROR) << "Init kv failed!"; + return; + } + LOG(INFO) << "Start http service"; + auto helper = std::make_unique(); + auto pool = std::make_unique(); + pool->start(FLAGS_meta_http_thread_num, "http thread pool"); + auto webSvc = std::make_unique(); + status = initWebService(webSvc.get(), gMetaKVStore.get(), helper.get(), pool.get()); + if (!status.ok()) { + LOG(ERROR) << "Init web service failed: " << status; + return; + } + + { + nebula::meta::JobManager *jobMgr = nebula::meta::JobManager::getInstance(); + if (!jobMgr->init(gMetaKVStore.get())) { + LOG(ERROR) << "Init job manager failed"; + return; + } + } + + { + /** + * Only leader part needed. + */ + auto ret = gMetaKVStore->partLeader(nebula::kDefaultSpaceId, nebula::kDefaultPartId); + if (!nebula::ok(ret)) { + LOG(ERROR) << "Part leader get failed"; + return; + } + if (nebula::value(ret) == metaLocalhost) { + LOG(INFO) << "Check and init root user"; + if (!nebula::meta::RootUserMan::isUserExists(gMetaKVStore.get())) { + if (!nebula::meta::RootUserMan::initRootUser(gMetaKVStore.get())) { + LOG(ERROR) << "Init root user failed"; + return; + } + } + } + } + + auto handler = + std::make_shared(gMetaKVStore.get(), metaClusterId()); + LOG(INFO) << "The meta deamon start on " << metaLocalhost; + try { + gMetaServer = std::make_unique(); + gMetaServer->setPort(FLAGS_meta_port); + gMetaServer->setIdleTimeout(std::chrono::seconds(0)); // No idle timeout on client connection + gMetaServer->setInterface(std::move(handler)); + if (FLAGS_enable_ssl || FLAGS_enable_meta_ssl) { + gMetaServer->setSSLConfig(nebula::sslContextConfig()); + } + metaReady = true; + gMetaServer->serve(); // Will wait until the server shuts down + } catch (const std::exception &e) { + LOG(ERROR) << "Exception thrown: " << e.what(); + return; + } + + LOG(INFO) << "The meta Daemon stopped"; + metaRet = EXIT_SUCCESS; + return; + }); + + constexpr int metaWaitTimeoutInSec = 15; + constexpr int metaWaitIntervalInSec = 1; + int32_t metaWaitCount = 0; + + while (!metaReady && metaWaitIntervalInSec * metaWaitCount++ < metaWaitTimeoutInSec) { + sleep(metaWaitIntervalInSec); + } + + if (!metaReady) { + LOG(ERROR) << "Meta not ready in time"; + return EXIT_FAILURE; + } + + // start graph server + int32_t graphRet = EXIT_FAILURE; + std ::unique_ptr graphThread = std::make_unique([&] { + SCOPE_EXIT { + stopAllDaemon(); + }; + nebula::HostAddr localhost{FLAGS_local_ip, FLAGS_port}; + LOG(INFO) << "Starting Graph HTTP Service"; + auto webSvc = std::make_unique(); + status = webSvc->start(FLAGS_ws_http_port, FLAGS_ws_h2_port); + if (!status.ok()) { + LOG(WARNING) << "Failed to start graph HTTP service"; + return; + } + + if (FLAGS_num_netio_threads == 0) { + FLAGS_num_netio_threads = std::thread::hardware_concurrency(); + } + if (FLAGS_num_netio_threads <= 0) { + LOG(WARNING) << "Number of networking IO threads should be greater than zero"; + return; + } + LOG(INFO) << "Number of networking IO threads: " << FLAGS_num_netio_threads; + + if (FLAGS_num_worker_threads == 0) { + FLAGS_num_worker_threads = std::thread::hardware_concurrency(); + } + if (FLAGS_num_worker_threads <= 0) { + LOG(WARNING) << "Number of worker threads should be greater than zero"; + return; + } + LOG(INFO) << "Number of worker threads: " << FLAGS_num_worker_threads; + + auto threadFactory = std::make_shared("graph-netio"); + auto ioThreadPool = std::make_shared(FLAGS_num_netio_threads, + std::move(threadFactory)); + gServer = std::make_unique(); + gServer->setIOThreadPool(ioThreadPool); + + auto interface = std::make_shared(); + status = interface->init(ioThreadPool, localhost); + if (!status.ok()) { + LOG(ERROR) << status; + return; + } + + gServer->setPort(localhost.port); + gServer->setInterface(std::move(interface)); + gServer->setReusePort(FLAGS_reuse_port); + gServer->setIdleTimeout(std::chrono::seconds(FLAGS_client_idle_timeout_secs)); + gServer->setNumAcceptThreads(FLAGS_num_accept_threads); + gServer->setListenBacklog(FLAGS_listen_backlog); + if (FLAGS_enable_ssl || FLAGS_enable_graph_ssl) { + gServer->setSSLConfig(nebula::sslContextConfig()); + } + setupThreadManager(); + // Modify two blocking service + FLOG_INFO("Starting nebula-graphd on %s:%d\n", localhost.host.c_str(), localhost.port); + try { + gServer->serve(); // Blocking wait until shut down via gServer->stop() + } catch (const std::exception &e) { + FLOG_ERROR("Exception thrown while starting the RPC server: %s", e.what()); + return; + } + FLOG_INFO("nebula-graphd on %s:%d has been stopped", localhost.host.c_str(), localhost.port); + graphRet = EXIT_SUCCESS; + return; + }); + + int32_t storageRet = EXIT_FAILURE; + std ::unique_ptr storageThread = std::make_unique([&] { + SCOPE_EXIT { + stopAllDaemon(); + }; + HostAddr host(FLAGS_local_ip, FLAGS_storage_port); + LOG(INFO) << "host = " << host; + auto metaAddrsRet = nebula::network::NetworkUtils::toHosts(FLAGS_meta_server_addrs); + if (!metaAddrsRet.ok() || metaAddrsRet.value().empty()) { + LOG(ERROR) << "Can't get metaServer address, status:" << metaAddrsRet.status() + << ", FLAGS_meta_server_addrs:" << FLAGS_meta_server_addrs; + return; + } + + std::vector paths; + folly::split(",", FLAGS_data_path, paths, true); + std::transform(paths.begin(), paths.end(), paths.begin(), [](auto &p) { + return folly::trimWhitespace(p).str(); + }); + if (paths.empty()) { + LOG(ERROR) << "Bad data_path format:" << FLAGS_data_path; + return; + } + gStorageServer = std::make_unique( + host, metaAddrsRet.value(), paths, FLAGS_wal_path, FLAGS_listener_path); + if (!gStorageServer->start()) { + LOG(ERROR) << "Storage server start failed"; + gStorageServer->stop(); + return; + } + gStorageServer->waitUntilStop(); + LOG(INFO) << "The storage Daemon stopped"; + storageRet = EXIT_SUCCESS; + return; + }); + + metaThread->join(); + graphThread->join(); + storageThread->join(); + if (metaRet != EXIT_SUCCESS || graphRet != EXIT_SUCCESS || storageRet != EXIT_SUCCESS) { + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +Status setupSignalHandler() { + return nebula::SignalHandler::install( + {SIGINT, SIGTERM}, + [](nebula::SignalHandler::GeneralSignalInfo *info) { signalHandler(info->sig()); }); +} + +void stopAllDaemon() { + std::lock_guard guard(gServerGuard); + if (gServer) { + gServer->stop(); + gServer.reset(); + } + if (gStorageServer) { + gStorageServer->stop(); + gStorageServer.reset(); + } + if (gMetaServer) { + gMetaServer->stop(); + gMetaServer.reset(); + } + { + auto gJobMgr = nebula::meta::JobManager::getInstance(); + if (gJobMgr) { + gJobMgr->shutDown(); + } + } + if (gMetaKVStore) { + gMetaKVStore->stop(); + gMetaKVStore.reset(); + } +} + +void signalHandler(int sig) { + switch (sig) { + case SIGINT: + case SIGTERM: + FLOG_INFO("Signal %d(%s) received, stopping this server", sig, ::strsignal(sig)); + stopAllDaemon(); + break; + default: + FLOG_ERROR("Signal %d(%s) received but ignored", sig, ::strsignal(sig)); + } +} + +void printHelp(const char *prog) { + fprintf(stderr, "%s --flagfile \n", prog); +} + +void setupThreadManager() { + int numThreads = + FLAGS_num_worker_threads > 0 ? FLAGS_num_worker_threads : gServer->getNumIOWorkerThreads(); + std::shared_ptr threadManager( + PriorityThreadManager::newPriorityThreadManager(numThreads, false /*stats*/)); + threadManager->setNamePrefix("executor"); + threadManager->start(); + gServer->setThreadManager(threadManager); +} diff --git a/src/graph/service/GraphFlags.cpp b/src/graph/service/GraphFlags.cpp index e31439ff2c3..32fd80f2c14 100644 --- a/src/graph/service/GraphFlags.cpp +++ b/src/graph/service/GraphFlags.cpp @@ -52,7 +52,14 @@ DEFINE_int64(max_allowed_connections, DEFINE_bool(enable_optimizer, false, "Whether to enable optimizer"); +#ifndef BUILD_STANDALONE DEFINE_uint32(ft_request_retry_times, 3, "Retry times if fulltext request failed"); +DEFINE_bool(enable_client_white_list, true, "Turn on/off the client white list."); +DEFINE_string(client_white_list, + nebula::getOriginVersion() + ":2.5.0:2.5.1:2.6.0", + "A white list for different client versions, separate with colon."); + +#endif DEFINE_bool(accept_partial_success, false, "Whether to accept partial success, default false"); @@ -63,11 +70,6 @@ DEFINE_bool(disable_octal_escape_char, DEFINE_bool(enable_experimental_feature, false, "Whether to enable experimental feature"); -DEFINE_bool(enable_client_white_list, true, "Turn on/off the client white list."); -DEFINE_string(client_white_list, - nebula::getOriginVersion() + ":2.5.0:2.5.1:2.6.0", - "A white list for different client versions, separate with colon."); - DEFINE_int32(num_rows_to_check_memory, 1024, "number rows to check memory"); // Sanity-checking Flag Values diff --git a/src/kvstore/Listener.cpp b/src/kvstore/Listener.cpp index 8e5129dfb56..82a6e4e2a76 100644 --- a/src/kvstore/Listener.cpp +++ b/src/kvstore/Listener.cpp @@ -11,7 +11,7 @@ DEFINE_int32(listener_commit_interval_secs, 1, "Listener commit interval"); DEFINE_int32(listener_commit_batch_size, 1000, "Max batch size when listener commit"); -DEFINE_int32(ft_request_retry_times, 3, "Retry times if fulltext request failed"); +DEFINE_uint32(ft_request_retry_times, 3, "Retry times if fulltext request failed"); DEFINE_int32(ft_bulk_batch_size, 100, "Max batch size when bulk insert"); DEFINE_int32(listener_pursue_leader_threshold, 1000, "Catch up with the leader's threshold"); diff --git a/src/kvstore/plugins/elasticsearch/ESListener.cpp b/src/kvstore/plugins/elasticsearch/ESListener.cpp index 2d899d61a6c..75b7918eb40 100644 --- a/src/kvstore/plugins/elasticsearch/ESListener.cpp +++ b/src/kvstore/plugins/elasticsearch/ESListener.cpp @@ -8,7 +8,7 @@ #include "common/plugin/fulltext/elasticsearch/ESStorageAdapter.h" #include "common/utils/NebulaKeyUtils.h" -DECLARE_int32(ft_request_retry_times); +DECLARE_uint32(ft_request_retry_times); DECLARE_int32(ft_bulk_batch_size); namespace nebula { diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index b4b71d961a7..5f7d027e739 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -89,5 +89,11 @@ nebula_add_library( StorageServer.cpp ) + +nebula_add_library( + storage_local_server_obj OBJECT + GraphStorageLocalServer.cpp +) + nebula_add_subdirectory(stats) nebula_add_subdirectory(test) diff --git a/src/storage/GraphStorageLocalServer.cpp b/src/storage/GraphStorageLocalServer.cpp new file mode 100644 index 00000000000..9627d1c4588 --- /dev/null +++ b/src/storage/GraphStorageLocalServer.cpp @@ -0,0 +1,150 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "GraphStorageLocalServer.h" + +#include + +#include "common/base/Base.h" + +#define LOCAL_RETURN_FUTURE(threadManager, respType, callFunc) \ + auto promise = std::make_shared>(); \ + auto f = promise->getFuture(); \ + threadManager->add([&, promise] { \ + handler_->callFunc(request).thenValue( \ + [promise](respType&& resp) { promise->setValue(std::move(resp)); }); \ + }); \ + return f; + +namespace nebula::storage { + +std::mutex mutex_; +std::shared_ptr instance_ = nullptr; + +void GraphStorageLocalServer::setThreadManager( + std::shared_ptr threadManager) { + // lock? + threadManager_ = threadManager; +} + +void GraphStorageLocalServer::setInterface(std::shared_ptr handler) { + handler_ = handler; +} + +void GraphStorageLocalServer::serve() { + if (serving_) { + LOG(WARNING) << "Server already serving"; + return; + } + // do nothing, wait stop + serving_ = true; + sem_.wait(); +} + +void GraphStorageLocalServer::stop() { + if (!serving_) { + LOG(WARNING) << "Can't stop server not serving"; + return; + } + sem_.signal(); + serving_ = false; +} + +folly::Future GraphStorageLocalServer::future_getNeighbors( + const cpp2::GetNeighborsRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::GetNeighborsResponse, future_getNeighbors); +} + +folly::Future GraphStorageLocalServer::future_addVertices( + const cpp2::AddVerticesRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::ExecResponse, future_addVertices); +} + +folly::Future GraphStorageLocalServer::future_chainAddEdges( + const cpp2::AddEdgesRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::ExecResponse, future_chainAddEdges); +} + +folly::Future GraphStorageLocalServer::future_addEdges( + const cpp2::AddEdgesRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::ExecResponse, future_addEdges); +} + +folly::Future GraphStorageLocalServer::future_getProps( + const cpp2::GetPropRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::GetPropResponse, future_getProps); +} + +folly::Future GraphStorageLocalServer::future_deleteEdges( + const cpp2::DeleteEdgesRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::ExecResponse, future_deleteEdges); +} + +folly::Future GraphStorageLocalServer::future_deleteVertices( + const cpp2::DeleteVerticesRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::ExecResponse, future_deleteVertices); +} + +folly::Future GraphStorageLocalServer::future_deleteTags( + const cpp2::DeleteTagsRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::ExecResponse, future_deleteTags); +} + +folly::Future GraphStorageLocalServer::future_updateVertex( + const cpp2::UpdateVertexRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::UpdateResponse, future_updateVertex); +} + +folly::Future GraphStorageLocalServer::future_chainUpdateEdge( + const cpp2::UpdateEdgeRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::UpdateResponse, future_chainUpdateEdge); +} + +folly::Future GraphStorageLocalServer::future_updateEdge( + const cpp2::UpdateEdgeRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::UpdateResponse, future_updateEdge); +} + +folly::Future GraphStorageLocalServer::future_getUUID( + const cpp2::GetUUIDReq& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::GetUUIDResp, future_getUUID); +} + +folly::Future GraphStorageLocalServer::future_lookupIndex( + const cpp2::LookupIndexRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::LookupIndexResp, future_lookupIndex); +} + +folly::Future GraphStorageLocalServer::future_lookupAndTraverse( + const cpp2::LookupAndTraverseRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::GetNeighborsResponse, future_lookupAndTraverse); +} + +folly::Future GraphStorageLocalServer::future_scanVertex( + const cpp2::ScanVertexRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::ScanResponse, future_scanVertex); +} + +folly::Future GraphStorageLocalServer::future_scanEdge( + const cpp2::ScanEdgeRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::ScanResponse, future_scanEdge); +} + +folly::Future GraphStorageLocalServer::future_get( + const cpp2::KVGetRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::KVGetResponse, future_get); +} + +folly::Future GraphStorageLocalServer::future_put( + const cpp2::KVPutRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::ExecResponse, future_put); +} + +folly::Future GraphStorageLocalServer::future_remove( + const cpp2::KVRemoveRequest& request) { + LOCAL_RETURN_FUTURE(threadManager_, cpp2::ExecResponse, future_remove); +} + +} // namespace nebula::storage diff --git a/src/storage/GraphStorageLocalServer.h b/src/storage/GraphStorageLocalServer.h new file mode 100644 index 00000000000..10e27463f7c --- /dev/null +++ b/src/storage/GraphStorageLocalServer.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#pragma once + +#include +#include + +#include + +#include "common/base/Base.h" +#include "folly/fibers/Semaphore.h" +#include "interface/gen-cpp2/GraphStorageServiceAsyncClient.h" +#include "storage/GraphStorageServiceHandler.h" + +namespace nebula::storage { +class GraphStorageLocalServer final : public nebula::cpp::NonCopyable, + public nebula::cpp::NonMovable { + public: + static std::shared_ptr getInstance() { + static std::shared_ptr instance{new GraphStorageLocalServer()}; + return instance; + } + void setThreadManager(std::shared_ptr threadManager); + void setInterface(std::shared_ptr iface); + void stop(); + void serve(); + + public: + folly::Future future_getNeighbors( + const cpp2::GetNeighborsRequest& request); + folly::Future future_addVertices(const cpp2::AddVerticesRequest& request); + folly::Future future_chainAddEdges(const cpp2::AddEdgesRequest& request); + folly::Future future_addEdges(const cpp2::AddEdgesRequest& request); + folly::Future future_getProps(const cpp2::GetPropRequest& request); + folly::Future future_deleteEdges(const cpp2::DeleteEdgesRequest& request); + folly::Future future_deleteVertices( + const cpp2::DeleteVerticesRequest& request); + folly::Future future_deleteTags(const cpp2::DeleteTagsRequest& request); + folly::Future future_updateVertex(const cpp2::UpdateVertexRequest& request); + folly::Future future_chainUpdateEdge( + const cpp2::UpdateEdgeRequest& request); + folly::Future future_updateEdge(const cpp2::UpdateEdgeRequest& request); + folly::Future future_getUUID(const cpp2::GetUUIDReq& request); + folly::Future future_lookupIndex(const cpp2::LookupIndexRequest& request); + folly::Future future_lookupAndTraverse( + const cpp2::LookupAndTraverseRequest& request); + folly::Future future_scanVertex(const cpp2::ScanVertexRequest& request); + folly::Future future_scanEdge(const cpp2::ScanEdgeRequest& request); + folly::Future future_get(const cpp2::KVGetRequest& request); + folly::Future future_put(const cpp2::KVPutRequest& request); + folly::Future future_remove(const cpp2::KVRemoveRequest& request); + + private: + GraphStorageLocalServer() = default; + + private: + std::shared_ptr threadManager_; + std::shared_ptr handler_; + folly::fibers::Semaphore sem_{0}; + static std::mutex mutex_; + bool serving_ = {false}; +}; +} // namespace nebula::storage diff --git a/src/storage/StorageServer.cpp b/src/storage/StorageServer.cpp index f29fdb490a1..66b697031c3 100644 --- a/src/storage/StorageServer.cpp +++ b/src/storage/StorageServer.cpp @@ -21,6 +21,7 @@ #include "kvstore/RocksEngine.h" #include "storage/BaseProcessor.h" #include "storage/CompactionFilter.h" +#include "storage/GraphStorageLocalServer.h" #include "storage/GraphStorageServiceHandler.h" #include "storage/InternalStorageServiceHandler.h" #include "storage/StorageAdminServiceHandler.h" @@ -35,12 +36,18 @@ #include "webservice/Router.h" #include "webservice/WebService.h" +#ifndef BUILD_STANDALONE DEFINE_int32(port, 44500, "Storage daemon listening port"); -DEFINE_int32(num_io_threads, 16, "Number of IO threads"); DEFINE_int32(num_worker_threads, 32, "Number of workers"); -DEFINE_int32(storage_http_thread_num, 3, "Number of storage daemon's http thread"); DEFINE_bool(local_config, false, "meta client will not retrieve latest configuration from meta"); +#else +DEFINE_int32(storage_port, 44501, "Storage daemon listening port"); +DEFINE_int32(storage_num_worker_threads, 32, "Number of workers"); +DECLARE_bool(local_config); +#endif DEFINE_bool(storage_kv_mode, false, "True for kv mode"); +DEFINE_int32(num_io_threads, 16, "Number of IO threads"); +DEFINE_int32(storage_http_thread_num, 3, "Number of storage daemon's http thread"); namespace nebula { namespace storage { @@ -117,7 +124,11 @@ bool StorageServer::initWebService() { return new storage::StorageHttpPropertyHandler(schemaMan_.get(), kvstore_.get()); }); +#ifndef BUILD_STANDALONE auto status = webSvc_->start(); +#else + auto status = webSvc_->start(FLAGS_ws_storage_http_port, FLAGS_ws_storage_h2_port); +#endif return status.ok(); } @@ -148,8 +159,13 @@ int32_t StorageServer::getAdminStoreSeqId() { bool StorageServer::start() { ioThreadPool_ = std::make_shared(FLAGS_num_io_threads); +#ifndef BUILD_STANDALONE + const int32_t numWorkerThreads = FLAGS_num_worker_threads; +#else + const int32_t numWorkerThreads = FLAGS_storage_num_worker_threads; +#endif workers_ = apache::thrift::concurrency::PriorityThreadManager::newPriorityThreadManager( - FLAGS_num_worker_threads); + numWorkerThreads); workers_->setNamePrefix("executor"); workers_->start(); @@ -222,17 +238,21 @@ bool StorageServer::start() { storageThread_.reset(new std::thread([this] { try { auto handler = std::make_shared(env_.get()); +#ifndef BUILD_STANDALONE storageServer_ = std::make_unique(); storageServer_->setPort(FLAGS_port); storageServer_->setIdleTimeout(std::chrono::seconds(0)); storageServer_->setIOThreadPool(ioThreadPool_); - storageServer_->setThreadManager(workers_); storageServer_->setStopWorkersOnStopListening(false); - storageServer_->setInterface(std::move(handler)); if (FLAGS_enable_ssl) { storageServer_->setSSLConfig(nebula::sslContextConfig()); } +#else + storageServer_ = GraphStorageLocalServer::getInstance(); +#endif + storageServer_->setThreadManager(workers_); + storageServer_->setInterface(std::move(handler)); ServiceStatus expected = STATUS_UNINITIALIZED; if (!storageSvcStatus_.compare_exchange_strong(expected, STATUS_RUNNING)) { LOG(ERROR) << "Impossible! How could it happen!"; diff --git a/src/storage/StorageServer.h b/src/storage/StorageServer.h index 27795c5eaf5..2f907263ef9 100644 --- a/src/storage/StorageServer.h +++ b/src/storage/StorageServer.h @@ -15,6 +15,7 @@ #include "common/meta/SchemaManager.h" #include "kvstore/NebulaStore.h" #include "storage/CommonUtils.h" +#include "storage/GraphStorageLocalServer.h" #include "storage/admin/AdminTaskManager.h" #include "storage/transaction/TransactionManager.h" @@ -64,7 +65,11 @@ class StorageServer final { std::atomic storageSvcStatus_{STATUS_UNINITIALIZED}; std::atomic adminSvcStatus_{STATUS_UNINITIALIZED}; +#ifndef BUILD_STANDALONE std::unique_ptr storageServer_; +#else + std::shared_ptr storageServer_; +#endif std::unique_ptr adminServer_; std::unique_ptr internalStorageThread_; diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt index 9c0e31c33f0..efc6bdb4d52 100644 --- a/src/tools/CMakeLists.txt +++ b/src/tools/CMakeLists.txt @@ -1,5 +1,9 @@ +# note: standalone version don't have dependent test tools for now. + +if(NOT ENABLE_STANDALONE_VERSION) nebula_add_subdirectory(storage-perf) nebula_add_subdirectory(simple-kv-verify) +endif() nebula_add_subdirectory(meta-dump) nebula_add_subdirectory(db-dump) nebula_add_subdirectory(db-upgrade) diff --git a/src/webservice/WebService.cpp b/src/webservice/WebService.cpp index 196734a8b13..393bc47fe4a 100644 --- a/src/webservice/WebService.cpp +++ b/src/webservice/WebService.cpp @@ -22,6 +22,10 @@ DEFINE_int32(ws_h2_port, 11002, "Port to listen on with HTTP/2 protocol"); DEFINE_string(ws_ip, "0.0.0.0", "IP/Hostname to bind to"); DEFINE_int32(ws_threads, 4, "Number of threads for the web service."); +#ifdef BUILD_STANDALONE +DEFINE_int32(ws_storage_threads, 4, "Number of threads for the web service."); +#endif + namespace nebula { namespace { @@ -57,7 +61,7 @@ WebService::~WebService() { wsThread_->join(); } -Status WebService::start() { +Status WebService::start(uint16_t httpPort, uint16_t h2Port) { if (started_) { LOG(INFO) << "Web service has been started."; return Status::OK(); @@ -87,8 +91,8 @@ Status WebService::start() { started_ = true; std::vector ips = { - {SocketAddress(FLAGS_ws_ip, FLAGS_ws_http_port, true), HTTPServer::Protocol::HTTP}, - {SocketAddress(FLAGS_ws_ip, FLAGS_ws_h2_port, true), HTTPServer::Protocol::HTTP2}, + {SocketAddress(FLAGS_ws_ip, httpPort, true), HTTPServer::Protocol::HTTP}, + {SocketAddress(FLAGS_ws_ip, h2Port, true), HTTPServer::Protocol::HTTP2}, }; CHECK_GT(FLAGS_ws_threads, 0) << "The number of webservice threads must be greater than zero"; diff --git a/src/webservice/WebService.h b/src/webservice/WebService.h index bd34004abd4..a6b760f486c 100644 --- a/src/webservice/WebService.h +++ b/src/webservice/WebService.h @@ -13,6 +13,12 @@ DECLARE_int32(ws_h2_port); DECLARE_string(ws_ip); DECLARE_int32(ws_threads); +#ifdef BUILD_STANDALONE +DECLARE_int32(ws_storage_http_port); +DECLARE_int32(ws_storage_h2_port); +DECLARE_int32(ws_storage_threads); +#endif + namespace proxygen { class HTTPServer; class RequestHandler; @@ -41,7 +47,8 @@ class WebService final { // Two ports would be bound, one for HTTP, another one for HTTP2. // If FLAGS_ws_http_port or FLAGS_ws_h2_port is zero, an ephemeral port // would be assigned and set back to the gflag, respectively. - NG_MUST_USE_RESULT Status start(); + NG_MUST_USE_RESULT Status start(uint16_t httpPort = FLAGS_ws_http_port, + uint16_t h2Port = FLAGS_ws_h2_port); // Check whether web service is started bool started() const { From 8cc544c150afd2e44ae681179d3fe619037fb3cf Mon Sep 17 00:00:00 2001 From: Ryan Date: Mon, 27 Dec 2021 19:39:19 +0800 Subject: [PATCH 5/8] Fix the issue that the query results will still contain the expired edges if we use go reversely (#3536) * Fix the issue that the query results will still contain the expired edges if we use go reversely * fix the code lint error Co-authored-by: Doodle <13706157+critical27@users.noreply.github.com> --- src/storage/query/QueryBaseProcessor-inl.h | 2 +- src/storage/test/GetNeighborsTest.cpp | 46 ++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/storage/query/QueryBaseProcessor-inl.h b/src/storage/query/QueryBaseProcessor-inl.h index d56e7ed300b..1c2c1594d39 100644 --- a/src/storage/query/QueryBaseProcessor-inl.h +++ b/src/storage/query/QueryBaseProcessor-inl.h @@ -176,7 +176,7 @@ void QueryBaseProcessor::buildEdgeTTLInfo() { auto ttlInfo = edgeSchema->getTTLInfo(); if (ttlInfo.ok()) { VLOG(2) << "Add ttl col " << ttlInfo.value().first << " of edge " << edgeType; - edgeContext_.ttlInfo_.emplace(edgeType, std::move(ttlInfo).value()); + edgeContext_.ttlInfo_.emplace(std::abs(edgeType), std::move(ttlInfo).value()); } } } diff --git a/src/storage/test/GetNeighborsTest.cpp b/src/storage/test/GetNeighborsTest.cpp index 95c4c455327..3bcd3115442 100644 --- a/src/storage/test/GetNeighborsTest.cpp +++ b/src/storage/test/GetNeighborsTest.cpp @@ -933,6 +933,7 @@ TEST(GetNeighborsTest, TtlTest) { auto threadPool = std::make_shared(4); TagID player = 1; + TagID team = 2; EdgeType serve = 101; { @@ -954,6 +955,24 @@ TEST(GetNeighborsTest, TtlTest) { // vId, stat, player, serve, expr QueryTestUtils::checkResponse(*resp.vertices_ref(), vertices, over, tags, edges, 1, 5); } + { + LOG(INFO) << "InEdgeReturnAllProperty"; + std::vector vertices = {"Spurs"}; + std::vector over = {-serve}; + std::vector>> tags; + std::vector>> edges; + tags.emplace_back(team, std::vector{"name"}); + edges.emplace_back(-serve, std::vector{"playerName", "startYear", "teamCareer"}); + auto req = QueryTestUtils::buildRequest(totalParts, vertices, over, tags, edges); + + auto* processor = GetNeighborsProcessor::instance(env, nullptr, threadPool.get()); + auto fut = processor->getFuture(); + processor->process(req); + auto resp = std::move(fut).get(); + ASSERT_EQ(0, (*resp.result_ref()).failed_parts.size()); + // vId, stat, team, - serve, expr + QueryTestUtils::checkResponse(*resp.vertices_ref(), vertices, over, tags, edges, 1, 5); + } { LOG(INFO) << "GoFromPlayerOverAll"; std::vector vertices = {"Tim Duncan"}; @@ -999,6 +1018,33 @@ TEST(GetNeighborsTest, TtlTest) { ASSERT_EQ(Value::Type::__EMPTY__, (*resp.vertices_ref()).rows[0].values[3].type()); ASSERT_EQ(Value::Type::__EMPTY__, (*resp.vertices_ref()).rows[0].values[4].type()); } + { + LOG(INFO) << "InEdgeReturnAllProperty"; + std::vector vertices = {"Spurs"}; + std::vector over = {-serve}; + std::vector>> tags; + std::vector>> edges; + tags.emplace_back(team, std::vector{"name"}); + edges.emplace_back(-serve, std::vector{"playerName", "startYear", "teamCareer"}); + auto req = QueryTestUtils::buildRequest(totalParts, vertices, over, tags, edges); + + auto* processor = GetNeighborsProcessor::instance(env, nullptr, threadPool.get()); + auto fut = processor->getFuture(); + processor->process(req); + auto resp = std::move(fut).get(); + + ASSERT_EQ(0, (*resp.result_ref()).failed_parts.size()); + ASSERT_EQ(1, (*resp.vertices_ref()).rows.size()); + // vId, stat, team, - serve, expr + ASSERT_EQ(5, (*resp.vertices_ref()).rows[0].values.size()); + ASSERT_EQ("Spurs", (*resp.vertices_ref()).rows[0].values[0].getStr()); + ASSERT_EQ(Value::Type::__EMPTY__, (*resp.vertices_ref()).rows[0].values[1].type()); + // team still exists (team hasn't set ttl) + ASSERT_EQ(Value::Type::LIST, (*resp.vertices_ref()).rows[0].values[2].type()); + // - serve expired + ASSERT_EQ(Value::Type::__EMPTY__, (*resp.vertices_ref()).rows[0].values[3].type()); + ASSERT_EQ(Value::Type::__EMPTY__, (*resp.vertices_ref()).rows[0].values[4].type()); + } { LOG(INFO) << "GoFromPlayerOverAll"; std::vector vertices = {"Tim Duncan"}; From e2b66dd7b42a8330ffcde0f52babf1f5aaaa7b26 Mon Sep 17 00:00:00 2001 From: "jie.wang" <38901892+jievince@users.noreply.github.com> Date: Mon, 27 Dec 2021 19:57:39 +0800 Subject: [PATCH 6/8] round the float/double when insert it to integer column (#3572) --- src/codec/RowWriterV2.cpp | 18 ++++--- src/graph/executor/admin/SpaceExecutor.cpp | 2 +- src/graph/service/GraphService.cpp | 6 ++- src/graph/service/QueryInstance.cpp | 10 ++-- tests/tck/features/bugfix/RoundFloat.feature | 55 ++++++++++++++++++++ 5 files changed, 76 insertions(+), 15 deletions(-) create mode 100644 tests/tck/features/bugfix/RoundFloat.feature diff --git a/src/codec/RowWriterV2.cpp b/src/codec/RowWriterV2.cpp index d1c4cbf0613..639b4854965 100644 --- a/src/codec/RowWriterV2.cpp +++ b/src/codec/RowWriterV2.cpp @@ -5,6 +5,8 @@ #include "codec/RowWriterV2.h" +#include + #include "common/time/TimeUtils.h" #include "common/time/WallClock.h" #include "common/utils/DefaultValueContext.h" @@ -286,7 +288,7 @@ WriteResult RowWriterV2::write(ssize_t index, float v) noexcept { if (v > std::numeric_limits::max() || v < std::numeric_limits::min()) { return WriteResult::OUT_OF_RANGE; } - int8_t iv = v; + int8_t iv = std::round(v); buf_[offset] = iv; break; } @@ -294,7 +296,7 @@ WriteResult RowWriterV2::write(ssize_t index, float v) noexcept { if (v > std::numeric_limits::max() || v < std::numeric_limits::min()) { return WriteResult::OUT_OF_RANGE; } - int16_t iv = v; + int16_t iv = std::round(v); memcpy(&buf_[offset], reinterpret_cast(&iv), sizeof(int16_t)); break; } @@ -303,7 +305,7 @@ WriteResult RowWriterV2::write(ssize_t index, float v) noexcept { v < static_cast(std::numeric_limits::min())) { return WriteResult::OUT_OF_RANGE; } - int32_t iv = v; + int32_t iv = std::round(v); memcpy(&buf_[offset], reinterpret_cast(&iv), sizeof(int32_t)); break; } @@ -312,7 +314,7 @@ WriteResult RowWriterV2::write(ssize_t index, float v) noexcept { v < static_cast(std::numeric_limits::min())) { return WriteResult::OUT_OF_RANGE; } - int64_t iv = v; + int64_t iv = std::round(v); memcpy(&buf_[offset], reinterpret_cast(&iv), sizeof(int64_t)); break; } @@ -343,7 +345,7 @@ WriteResult RowWriterV2::write(ssize_t index, double v) noexcept { if (v > std::numeric_limits::max() || v < std::numeric_limits::min()) { return WriteResult::OUT_OF_RANGE; } - int8_t iv = v; + int8_t iv = std::round(v); buf_[offset] = iv; break; } @@ -351,7 +353,7 @@ WriteResult RowWriterV2::write(ssize_t index, double v) noexcept { if (v > std::numeric_limits::max() || v < std::numeric_limits::min()) { return WriteResult::OUT_OF_RANGE; } - int16_t iv = v; + int16_t iv = std::round(v); memcpy(&buf_[offset], reinterpret_cast(&iv), sizeof(int16_t)); break; } @@ -359,7 +361,7 @@ WriteResult RowWriterV2::write(ssize_t index, double v) noexcept { if (v > std::numeric_limits::max() || v < std::numeric_limits::min()) { return WriteResult::OUT_OF_RANGE; } - int32_t iv = v; + int32_t iv = std::round(v); memcpy(&buf_[offset], reinterpret_cast(&iv), sizeof(int32_t)); break; } @@ -368,7 +370,7 @@ WriteResult RowWriterV2::write(ssize_t index, double v) noexcept { v < static_cast(std::numeric_limits::min())) { return WriteResult::OUT_OF_RANGE; } - int64_t iv = v; + int64_t iv = std::round(v); memcpy(&buf_[offset], reinterpret_cast(&iv), sizeof(int64_t)); break; } diff --git a/src/graph/executor/admin/SpaceExecutor.cpp b/src/graph/executor/admin/SpaceExecutor.cpp index 9591b734b32..e7f9cc52a62 100644 --- a/src/graph/executor/admin/SpaceExecutor.cpp +++ b/src/graph/executor/admin/SpaceExecutor.cpp @@ -167,7 +167,7 @@ folly::Future DropSpaceExecutor::execute() { } void DropSpaceExecutor::unRegisterSpaceLevelMetrics(const std::string &spaceName) { - if (FLAGS_enable_space_level_metrics) { + if (FLAGS_enable_space_level_metrics && spaceName != "") { stats::StatsManager::removeCounterWithLabels(kNumQueries, {{"space", spaceName}}); stats::StatsManager::removeCounterWithLabels(kNumSlowQueries, {{"space", spaceName}}); stats::StatsManager::removeCounterWithLabels(kNumQueryErrors, {{"space", spaceName}}); diff --git a/src/graph/service/GraphService.cpp b/src/graph/service/GraphService.cpp index 0417399c1fc..73f903526e6 100644 --- a/src/graph/service/GraphService.cpp +++ b/src/graph/service/GraphService.cpp @@ -161,8 +161,10 @@ folly::Future GraphService::future_executeWithParameter( return ctx->finish(); } stats::StatsManager::addValue(kNumQueries); - stats::StatsManager::addValue( - stats::StatsManager::counterWithLabels(kNumQueries, {{"space", sessionPtr->space().name}})); + if (FLAGS_enable_space_level_metrics && sessionPtr->space().name != "") { + stats::StatsManager::addValue(stats::StatsManager::counterWithLabels( + kNumQueries, {{"space", sessionPtr->space().name}})); + } ctx->setSession(std::move(sessionPtr)); ctx->setParameterMap(parameterMap); queryEngine_->execute(std::move(ctx)); diff --git a/src/graph/service/QueryInstance.cpp b/src/graph/service/QueryInstance.cpp index 4b42a5c9a7f..09d6769f90d 100644 --- a/src/graph/service/QueryInstance.cpp +++ b/src/graph/service/QueryInstance.cpp @@ -163,8 +163,10 @@ void QueryInstance::onError(Status status) { auto latency = rctx->duration().elapsedInUSec(); rctx->resp().latencyInUs = latency; stats::StatsManager::addValue(kNumQueryErrors); - stats::StatsManager::addValue( - stats::StatsManager::counterWithLabels(kNumQueryErrors, {{"space", spaceName}})); + if (FLAGS_enable_space_level_metrics && spaceName != "") { + stats::StatsManager::addValue( + stats::StatsManager::counterWithLabels(kNumQueryErrors, {{"space", spaceName}})); + } addSlowQueryStats(latency, spaceName); rctx->session()->deleteQuery(qctx_.get()); rctx->finish(); @@ -173,14 +175,14 @@ void QueryInstance::onError(Status status) { void QueryInstance::addSlowQueryStats(uint64_t latency, const std::string &spaceName) const { stats::StatsManager::addValue(kQueryLatencyUs, latency); - if (FLAGS_enable_space_level_metrics) { + if (FLAGS_enable_space_level_metrics && spaceName != "") { stats::StatsManager::addValue( stats::StatsManager::histoWithLabels(kQueryLatencyUs, {{"space", spaceName}}), latency); } if (latency > static_cast(FLAGS_slow_query_threshold_us)) { stats::StatsManager::addValue(kNumSlowQueries); stats::StatsManager::addValue(kSlowQueryLatencyUs, latency); - if (FLAGS_enable_space_level_metrics) { + if (FLAGS_enable_space_level_metrics && spaceName != "") { stats::StatsManager::addValue( stats::StatsManager::counterWithLabels(kNumSlowQueries, {{"space", spaceName}})); stats::StatsManager::addValue( diff --git a/tests/tck/features/bugfix/RoundFloat.feature b/tests/tck/features/bugfix/RoundFloat.feature new file mode 100644 index 00000000000..307e42a8ac3 --- /dev/null +++ b/tests/tck/features/bugfix/RoundFloat.feature @@ -0,0 +1,55 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License. +Feature: Round the float/double when insert them into integer column + + # issue https://github.com/vesoft-inc/nebula/issues/3473 + Scenario: Insert float/double into a integer column + Given an empty graph + And create a space with following options: + | partition_num | 9 | + | replica_factor | 1 | + | vid_type | FIXED_STRING(30) | + | charset | utf8 | + | collate | utf8_bin | + When executing query: + """ + create tag test(a int32); + """ + Then the execution should be successful + When try to execute query: + """ + INSERT VERTEX test(a) VALUES '101':(3.2); + """ + Then the execution should be successful + When executing query: + """ + INSERT VERTEX test(a) VALUES '102':(3.8); + """ + Then the execution should be successful + When executing query: + """ + INSERT VERTEX test(a) VALUES '103':(-3.2); + """ + Then the execution should be successful + When executing query: + """ + INSERT VERTEX test(a) VALUES '104':(-3.8); + """ + Then the execution should be successful + When executing query: + """ + INSERT VERTEX test(a) VALUES '104':(2147483647.1); + """ + Then an ExecutionError should be raised at runtime: Storage Error: Out of range value. + When executing query: + """ + FETCH PROP ON test '101', '102', '103', '104' YIELD test.a; + """ + Then the result should be, in any order, with relax comparison: + | test.a | + | 3 | + | 4 | + | -3 | + | -4 | + Then drop the used space From f42afac111adf6a1dacdc48f8187e9c557b5e314 Mon Sep 17 00:00:00 2001 From: "yuehua.jia" <3423893+jiayuehua@users.noreply.github.com> Date: Mon, 27 Dec 2021 20:35:36 +0800 Subject: [PATCH 7/8] Support logrotate (#3541) * add cmake git hooks options * githooks option * include what you use * make name follow convention * linker support lld, which is the fastest linker. If ci install lld when pull request , will set lld as default linker. * slog rotate * log rotate * log rotate setuplogging format * log rotate * fix glog_fatal log destination * log rotate * clang-format ExprVisitor --- conf/nebula-graphd.conf.default | 3 ++- conf/nebula-graphd.conf.production | 2 ++ conf/nebula-metad.conf.default | 2 ++ conf/nebula-metad.conf.production | 2 ++ conf/nebula-storaged-listener.conf.production | 2 ++ conf/nebula-storaged.conf.default | 2 ++ conf/nebula-storaged.conf.production | 2 ++ src/daemons/GraphDaemon.cpp | 4 ++-- src/daemons/MetaDaemon.cpp | 4 ++-- src/daemons/SetupLogging.cpp | 22 ++++++++++++++----- src/daemons/SetupLogging.h | 18 +++++++++++++++ src/daemons/StorageDaemon.cpp | 4 ++-- 12 files changed, 54 insertions(+), 13 deletions(-) create mode 100644 src/daemons/SetupLogging.h diff --git a/conf/nebula-graphd.conf.default b/conf/nebula-graphd.conf.default index da2efa27c98..65d9d3604fb 100644 --- a/conf/nebula-graphd.conf.default +++ b/conf/nebula-graphd.conf.default @@ -28,7 +28,8 @@ --stderr_log_file=graphd-stderr.log # Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively. --stderrthreshold=2 - +# wether logging files' name contain time stamp. +--timestamp_in_logfile_name=true ########## query ########## # Whether to treat partial success as an error. # This flag is only used for Read-only access, and Modify access always treats partial success as an error. diff --git a/conf/nebula-graphd.conf.production b/conf/nebula-graphd.conf.production index 67798fa6942..0d178d03a56 100644 --- a/conf/nebula-graphd.conf.production +++ b/conf/nebula-graphd.conf.production @@ -26,6 +26,8 @@ --stderr_log_file=graphd-stderr.log # Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively. --stderrthreshold=2 +# wether logging files' name contain timestamp +--timestamp_in_logfile_name=true ########## query ########## # Whether to treat partial success as an error. diff --git a/conf/nebula-metad.conf.default b/conf/nebula-metad.conf.default index 332e31911c7..89e537b9a57 100644 --- a/conf/nebula-metad.conf.default +++ b/conf/nebula-metad.conf.default @@ -20,6 +20,8 @@ --stderr_log_file=metad-stderr.log # Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively. --stderrthreshold=2 +# wether logging files' name contain time stamp, If Using logrotate to rotate logging files, than should set it to true. +--timestamp_in_logfile_name=true ########## networking ########## # Comma separated Meta Server addresses diff --git a/conf/nebula-metad.conf.production b/conf/nebula-metad.conf.production index a7fb729315b..870ba6318c2 100644 --- a/conf/nebula-metad.conf.production +++ b/conf/nebula-metad.conf.production @@ -20,6 +20,8 @@ --stderr_log_file=metad-stderr.log # Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively. --stderrthreshold=2 +# wether logging files' name contain time stamp. +--timestamp_in_logfile_name=true ########## networking ########## # Comma separated Meta Server addresses diff --git a/conf/nebula-storaged-listener.conf.production b/conf/nebula-storaged-listener.conf.production index da6095117fb..6dff785502b 100644 --- a/conf/nebula-storaged-listener.conf.production +++ b/conf/nebula-storaged-listener.conf.production @@ -23,6 +23,8 @@ --stderr_log_file=storaged-stderr.log # Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively. --stderrthreshold=2 +# Wether logging files' name contain timestamp. +--timestamp_in_logfile_name=true ########## networking ########## # Meta server address diff --git a/conf/nebula-storaged.conf.default b/conf/nebula-storaged.conf.default index 59cd0f80cd9..b5e9019a52b 100644 --- a/conf/nebula-storaged.conf.default +++ b/conf/nebula-storaged.conf.default @@ -22,6 +22,8 @@ --stderr_log_file=storaged-stderr.log # Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively. --stderrthreshold=2 +# Wether logging files' name contain time stamp. +--timestamp_in_logfile_name=true ########## networking ########## # Comma separated Meta server addresses diff --git a/conf/nebula-storaged.conf.production b/conf/nebula-storaged.conf.production index 688fa36910e..18a71133731 100644 --- a/conf/nebula-storaged.conf.production +++ b/conf/nebula-storaged.conf.production @@ -22,6 +22,8 @@ --stderr_log_file=storaged-stderr.log # Copy log messages at or above this level to stderr in addition to logfiles. The numbers of severity levels INFO, WARNING, ERROR, and FATAL are 0, 1, 2, and 3, respectively. --stderrthreshold=2 +# Wether logging files' name contain timestamp. +--timestamp_in_logfile_name=true ########## networking ########## # Comma separated Meta server addresses diff --git a/src/daemons/GraphDaemon.cpp b/src/daemons/GraphDaemon.cpp index 87efa2107be..c5acae262e5 100644 --- a/src/daemons/GraphDaemon.cpp +++ b/src/daemons/GraphDaemon.cpp @@ -16,6 +16,7 @@ #include "common/process/ProcessUtils.h" #include "common/ssl/SSLConfig.h" #include "common/time/TimezoneInfo.h" +#include "daemons/SetupLogging.h" #include "graph/service/GraphFlags.h" #include "graph/service/GraphService.h" #include "graph/stats/GraphStats.h" @@ -33,7 +34,6 @@ static std::unique_ptr gServer; static void signalHandler(int sig); static Status setupSignalHandler(); -extern Status setupLogging(); static void printHelp(const char *prog); static void setupThreadManager(); #if defined(__x86_64__) @@ -68,7 +68,7 @@ int main(int argc, char *argv[]) { } // Setup logging - auto status = setupLogging(); + auto status = setupLogging(argv[0]); if (!status.ok()) { LOG(ERROR) << status; return EXIT_FAILURE; diff --git a/src/daemons/MetaDaemon.cpp b/src/daemons/MetaDaemon.cpp index 11960bc7794..211a88d17d0 100644 --- a/src/daemons/MetaDaemon.cpp +++ b/src/daemons/MetaDaemon.cpp @@ -18,6 +18,7 @@ #include "common/thread/GenericThreadPool.h" #include "common/time/TimezoneInfo.h" #include "common/utils/MetaKeyUtils.h" +#include "daemons/SetupLogging.h" #include "kvstore/NebulaStore.h" #include "kvstore/PartManager.h" #include "meta/ActiveHostsMan.h" @@ -58,7 +59,6 @@ static std::unique_ptr gKVStore; static void signalHandler(int sig); static void waitForStop(); static Status setupSignalHandler(); -extern Status setupLogging(); #if defined(__x86_64__) extern Status setupBreakpad(); #endif @@ -71,7 +71,7 @@ int main(int argc, char* argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, false); // Setup logging - auto status = setupLogging(); + auto status = setupLogging(argv[0]); if (!status.ok()) { LOG(ERROR) << status; return EXIT_FAILURE; diff --git a/src/daemons/SetupLogging.cpp b/src/daemons/SetupLogging.cpp index 2b34a07e440..18441d3be9e 100644 --- a/src/daemons/SetupLogging.cpp +++ b/src/daemons/SetupLogging.cpp @@ -3,10 +3,15 @@ * This source code is licensed under Apache 2.0 License. */ +#include "daemons/SetupLogging.h" + +#include + +#include + #include "common/base/Base.h" #include "common/base/Status.h" #include "common/fs/FileUtils.h" - DECLARE_string(log_dir); DEFINE_bool(redirect_stdout, true, "Whether to redirect stdout and stderr to separate files"); @@ -16,12 +21,17 @@ DEFINE_string(stderr_log_file, "stderr.log", "Destination filename of stderr"); using nebula::Status; using nebula::fs::FileUtils; -Status setupLogging() { +Status setupLogging(const std::string &exe) { // If the log directory does not exist, try to create - if (!FileUtils::exist(FLAGS_log_dir)) { - if (!FileUtils::makeDir(FLAGS_log_dir)) { - return Status::Error("Failed to create log directory `%s'", FLAGS_log_dir.c_str()); - } + if (!FileUtils::exist(FLAGS_log_dir) && !FileUtils::makeDir(FLAGS_log_dir)) { + return Status::Error("Failed to create log directory `%s'", FLAGS_log_dir.c_str()); + } + if (!FLAGS_timestamp_in_logfile_name) { + google::SetLogDestination(google::GLOG_INFO, (FLAGS_log_dir + '/' + exe + ".INFO").c_str()); + google::SetLogDestination(google::GLOG_WARNING, + (FLAGS_log_dir + '/' + exe + ".WARNING").c_str()); + google::SetLogDestination(google::GLOG_ERROR, (FLAGS_log_dir + '/' + exe + ".ERROR").c_str()); + google::SetLogDestination(google::GLOG_FATAL, (FLAGS_log_dir + '/' + exe + ".FATAL").c_str()); } if (!FLAGS_redirect_stdout) { diff --git a/src/daemons/SetupLogging.h b/src/daemons/SetupLogging.h new file mode 100644 index 00000000000..2f1764e924a --- /dev/null +++ b/src/daemons/SetupLogging.h @@ -0,0 +1,18 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#ifndef SETUPLOGGING_H +#define SETUPLOGGING_H + +#include + +#include "common/base/Status.h" +/** + * \param exe: program name. + * \return wether successfully setupLogging. + * + */ +nebula::Status setupLogging(const std::string &exe); +#endif diff --git a/src/daemons/StorageDaemon.cpp b/src/daemons/StorageDaemon.cpp index 0b7c6f0a104..e40625b531d 100644 --- a/src/daemons/StorageDaemon.cpp +++ b/src/daemons/StorageDaemon.cpp @@ -12,6 +12,7 @@ #include "common/network/NetworkUtils.h" #include "common/process/ProcessUtils.h" #include "common/time/TimezoneInfo.h" +#include "daemons/SetupLogging.h" #include "storage/StorageServer.h" #include "storage/stats/StorageStats.h" #include "version/Version.h" @@ -46,7 +47,6 @@ using nebula::network::NetworkUtils; static void signalHandler(int sig); static Status setupSignalHandler(); -extern Status setupLogging(); #if defined(__x86_64__) extern Status setupBreakpad(); #endif @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, false); // Setup logging - auto status = setupLogging(); + auto status = setupLogging(argv[0]); if (!status.ok()) { LOG(ERROR) << status; return EXIT_FAILURE; From 2a4be173626a031bc9696b6a2a02cbe3848691a6 Mon Sep 17 00:00:00 2001 From: shylock <33566796+Shylock-Hg@users.noreply.github.com> Date: Tue, 28 Dec 2021 10:02:20 +0800 Subject: [PATCH 8/8] Make scan limit error more friendly (#3549) * Make scan limit error more friendly. * Fix test case. * Fix cases. Co-authored-by: Sophie <84560950+Sophie-Xie@users.noreply.github.com> --- src/graph/executor/query/ScanEdgesExecutor.cpp | 4 +++- src/graph/executor/query/ScanVerticesExecutor.cpp | 4 +++- tests/tck/features/match/Base.IntVid.feature | 14 +++++++------- tests/tck/features/match/Base.feature | 14 +++++++------- tests/tck/features/match/Scan.feature | 12 ++++++------ tests/tck/features/match/SeekByEdge.feature | 8 ++++---- tests/tck/features/match/SeekById.feature | 10 +++++----- tests/tck/features/match/SeekById.intVid.feature | 10 +++++----- 8 files changed, 40 insertions(+), 36 deletions(-) diff --git a/src/graph/executor/query/ScanEdgesExecutor.cpp b/src/graph/executor/query/ScanEdgesExecutor.cpp index cdca90b3eb0..7f07759a549 100644 --- a/src/graph/executor/query/ScanEdgesExecutor.cpp +++ b/src/graph/executor/query/ScanEdgesExecutor.cpp @@ -26,7 +26,9 @@ folly::Future ScanEdgesExecutor::scanEdges() { StorageClient *client = qctx()->getStorageClient(); auto *se = asNode(node()); if (se->limit() < 0) { - return Status::Error("Scan edges must specify limit number."); + return Status::Error( + "Scan vertices or edges need to specify a limit number, " + "or limit number can not push down."); } time::Duration scanEdgesTime; diff --git a/src/graph/executor/query/ScanVerticesExecutor.cpp b/src/graph/executor/query/ScanVerticesExecutor.cpp index b7510fc87a7..490990c29a2 100644 --- a/src/graph/executor/query/ScanVerticesExecutor.cpp +++ b/src/graph/executor/query/ScanVerticesExecutor.cpp @@ -25,7 +25,9 @@ folly::Future ScanVerticesExecutor::scanVertices() { auto *sv = asNode(node()); if (sv->limit() < 0) { - return Status::Error("Scan vertices must specify limit number."); + return Status::Error( + "Scan vertices or edges need to specify a limit number," + " or limit number can not push down."); } StorageClient *storageClient = qctx()->getStorageClient(); diff --git a/tests/tck/features/match/Base.IntVid.feature b/tests/tck/features/match/Base.IntVid.feature index 36d9c69d83e..0808dc173bc 100644 --- a/tests/tck/features/match/Base.IntVid.feature +++ b/tests/tck/features/match/Base.IntVid.feature @@ -487,36 +487,36 @@ Feature: Basic match """ MATCH (v) return v """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v{name: "Tim Duncan"}) return v """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v:player:bachelor) RETURN v """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v:player{age:23}:bachelor) RETURN v """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH () -[]-> (v) return * """ - Then a ExecutionError should be raised at runtime: Scan edges must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH () --> (v) --> () return * """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. # The 0 step means node scan in fact, but p and t has no label or properties for index seek # So it's not workable now When executing query: """ MATCH (p)-[:serve*0..3]->(t) RETURN p """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. diff --git a/tests/tck/features/match/Base.feature b/tests/tck/features/match/Base.feature index 2177de5273e..def650c2de9 100644 --- a/tests/tck/features/match/Base.feature +++ b/tests/tck/features/match/Base.feature @@ -596,36 +596,36 @@ Feature: Basic match """ MATCH (v) return v """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v{name: "Tim Duncan"}) return v """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v:player:bachelor) RETURN v """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v:player{age:23}:bachelor) RETURN v """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH () -[]-> (v) return * """ - Then a ExecutionError should be raised at runtime: Scan edges must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH () --> (v) --> () return * """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. # The 0 step means node scan in fact, but p and t has no label or properties for index seek # So it's not workable now When executing query: """ MATCH (p)-[:serve*0..3]->(t) RETURN p """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. diff --git a/tests/tck/features/match/Scan.feature b/tests/tck/features/match/Scan.feature index 5204849b9b8..4c3a8430299 100644 --- a/tests/tck/features/match/Scan.feature +++ b/tests/tck/features/match/Scan.feature @@ -84,14 +84,14 @@ Feature: Match seek by scan MATCH (v) RETURN v.name AS Name """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v{name: "Mary"}) RETURN v.name AS Name LIMIT 3 """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. Scenario: query edge by scan When executing query: @@ -147,25 +147,25 @@ Feature: Match seek by scan MATCH ()-[e]->() RETURN type(e) AS Type """ - Then a ExecutionError should be raised at runtime: Scan edges must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v)-[e]->() RETURN v.name, type(e) AS Type LIMIT 3 """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH ()-[e:is_teacher]-() RETURN type(e) AS Type, e.start_year AS StartYear, e.end_year AS EndYear LIMIT 3 """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH ()-[e]-() RETURN type(e) AS Type, e.start_year AS StartYear, e.end_year AS EndYear LIMIT 3 """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. diff --git a/tests/tck/features/match/SeekByEdge.feature b/tests/tck/features/match/SeekByEdge.feature index 51c1983eb0b..4e91b88ec65 100644 --- a/tests/tck/features/match/SeekByEdge.feature +++ b/tests/tck/features/match/SeekByEdge.feature @@ -1469,7 +1469,7 @@ Feature: Match seek by edge MATCH (p1)-[:teammate]->(p2) RETURN p1.name, id(p2) """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. Scenario Outline: seek by edge in a single edge type space Given an empty graph @@ -1490,16 +1490,16 @@ Feature: Match seek by edge MATCH (p1)-[]->(p2) RETURN p1.name, id(p2) """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (p1)-[b]->(p2) RETURN p1.name, id(p2) """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (p1)-[:edge_1]->(p2) RETURN p1.name, id(p2) """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. diff --git a/tests/tck/features/match/SeekById.feature b/tests/tck/features/match/SeekById.feature index c3dd7f5ba72..ebc583aa433 100644 --- a/tests/tck/features/match/SeekById.feature +++ b/tests/tck/features/match/SeekById.feature @@ -222,14 +222,14 @@ Feature: Match seek by id WHERE NOT id(v) == 'Paul Gasol' RETURN v.name AS Name, v.age AS Age """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) WHERE NOT id(v) IN ['James Harden', 'Jonathon Simmons', 'Klay Thompson', 'Dejounte Murray'] RETURN v.name AS Name """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) @@ -237,7 +237,7 @@ Feature: Match seek by id OR v.age == 23 RETURN v.name AS Name """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) @@ -245,7 +245,7 @@ Feature: Match seek by id OR v.age == 23 RETURN v.name AS Name """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) @@ -266,7 +266,7 @@ Feature: Match seek by id WHERE id(v) IN ['James Harden', v.name] RETURN v.name AS Name """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. Scenario: Start from end When executing query: diff --git a/tests/tck/features/match/SeekById.intVid.feature b/tests/tck/features/match/SeekById.intVid.feature index fb5fa4db1c2..e0ee24255b9 100644 --- a/tests/tck/features/match/SeekById.intVid.feature +++ b/tests/tck/features/match/SeekById.intVid.feature @@ -222,14 +222,14 @@ Feature: Match seek by id WHERE NOT id(v) == hash('Paul Gasol') RETURN v.name AS Name, v.age AS Age """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) WHERE NOT id(v) IN [hash('James Harden'), hash('Jonathon Simmons'), hash('Klay Thompson'), hash('Dejounte Murray')] RETURN v.name AS Name """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) @@ -237,7 +237,7 @@ Feature: Match seek by id OR v.age == 23 RETURN v.name AS Name """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) @@ -245,7 +245,7 @@ Feature: Match seek by id OR v.age == 23 RETURN v.name AS Name """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. When executing query: """ MATCH (v) @@ -259,7 +259,7 @@ Feature: Match seek by id WHERE id(v) IN [hash('James Harden'), v.name] RETURN v.name AS Name """ - Then a ExecutionError should be raised at runtime: Scan vertices must specify limit number. + Then a ExecutionError should be raised at runtime: Scan vertices or edges need to specify a limit number, or limit number can not push down. Scenario: with arithmetic When executing query: