|
18 | 18 | #include "runtime/exec_env.h"
|
19 | 19 |
|
20 | 20 | #include <gen_cpp/HeartbeatService_types.h>
|
| 21 | +#include <glog/logging.h> |
21 | 22 |
|
22 | 23 | #include <mutex>
|
23 | 24 | #include <utility>
|
24 | 25 |
|
25 | 26 | #include "common/config.h"
|
| 27 | +#include "common/logging.h" |
26 | 28 | #include "olap/olap_define.h"
|
27 | 29 | #include "olap/storage_engine.h"
|
28 | 30 | #include "olap/tablet_manager.h"
|
@@ -119,34 +121,25 @@ std::map<TNetworkAddress, FrontendInfo> ExecEnv::get_running_frontends() {
|
119 | 121 | const auto now = GetCurrentTimeMicros() / 1000;
|
120 | 122 |
|
121 | 123 | for (const auto& pair : _frontends) {
|
122 |
| - if (pair.second.info.process_uuid != 0) { |
123 |
| - if (now - pair.second.last_reveiving_time_ms < expired_duration) { |
| 124 | + auto& brpc_addr = pair.first; |
| 125 | + auto& fe_info = pair.second; |
| 126 | + |
| 127 | + if (fe_info.info.process_uuid == 0) { |
| 128 | + // FE is in an unknown state, regart it as alive. conservative |
| 129 | + res[brpc_addr] = fe_info; |
| 130 | + } else { |
| 131 | + if (now - fe_info.last_reveiving_time_ms < expired_duration) { |
124 | 132 | // If fe info has just been update in last expired_duration, regard it as running.
|
125 |
| - res[pair.first] = pair.second; |
| 133 | + res[brpc_addr] = fe_info; |
126 | 134 | } else {
|
127 | 135 | // Fe info has not been udpate for more than expired_duration, regard it as an abnormal.
|
128 | 136 | // Abnormal means this fe can not connect to master, and it is not dropped from cluster.
|
129 | 137 | // or fe do not have master yet.
|
130 |
| - LOG(INFO) << "Frontend " << PrintFrontendInfo(pair.second.info) |
131 |
| - << " has not update its hb " |
132 |
| - << "for more than " << config::fe_expire_duration_seconds |
133 |
| - << " secs, regard it as abnormal."; |
| 138 | + LOG_EVERY_N(WARNING, 50) << fmt::format( |
| 139 | + "Frontend {}:{} has not update its hb for more than {} secs, regard it as " |
| 140 | + "abnormal", |
| 141 | + brpc_addr.hostname, brpc_addr.port, config::fe_expire_duration_seconds); |
134 | 142 | }
|
135 |
| - |
136 |
| - continue; |
137 |
| - } |
138 |
| - |
139 |
| - if (pair.second.last_reveiving_time_ms - pair.second.first_receiving_time_ms > |
140 |
| - expired_duration) { |
141 |
| - // A zero process-uuid that sustains more than 60 seconds(default). |
142 |
| - // We will regard this fe as a abnormal frontend. |
143 |
| - LOG(INFO) << "Frontend " << PrintFrontendInfo(pair.second.info) |
144 |
| - << " has not update its hb " |
145 |
| - << "for more than " << config::fe_expire_duration_seconds |
146 |
| - << " secs, regard it as abnormal."; |
147 |
| - continue; |
148 |
| - } else { |
149 |
| - res[pair.first] = pair.second; |
150 | 143 | }
|
151 | 144 | }
|
152 | 145 |
|
|
0 commit comments