diff --git a/conf/nebula-graphd.conf.default b/conf/nebula-graphd.conf.default index 69eee083941..da2efa27c98 100644 --- a/conf/nebula-graphd.conf.default +++ b/conf/nebula-graphd.conf.default @@ -86,3 +86,5 @@ ########## experimental feature ########## # if use experimental features --enable_experimental_feature=false + +--enable_space_level_metrics=false diff --git a/conf/nebula-graphd.conf.production b/conf/nebula-graphd.conf.production index 3c1518f90cd..67798fa6942 100644 --- a/conf/nebula-graphd.conf.production +++ b/conf/nebula-graphd.conf.production @@ -84,3 +84,5 @@ ########## experimental feature ########## # if use experimental features --enable_experimental_feature=false + +--enable_space_level_metrics=false diff --git a/src/common/stats/StatsManager.cpp b/src/common/stats/StatsManager.cpp index 4b92cbbf271..6db76f83be7 100644 --- a/src/common/stats/StatsManager.cpp +++ b/src/common/stats/StatsManager.cpp @@ -5,7 +5,9 @@ #include "common/stats/StatsManager.h" +#include #include +#include #include "common/base/Base.h" @@ -51,7 +53,7 @@ void StatsManager::parseStats(const folly::StringPiece stats, // Percentile double pct; if (strToPct(trimmedPart, pct)) { - percentiles.push_back(std::make_pair(trimmedPart, pct)); + percentiles.emplace_back(trimmedPart, pct); } else { LOG(ERROR) << "\"" << trimmedPart << "\" is not a valid percentile form"; } @@ -63,38 +65,45 @@ void StatsManager::parseStats(const folly::StringPiece stats, // static CounterId StatsManager::registerStats(folly::StringPiece counterName, std::string stats) { - using std::chrono::seconds; - - auto& sm = get(); std::vector methods; std::vector> percentiles; parseStats(stats, methods, percentiles); + return registerStats(counterName, methods); +} + +CounterId StatsManager::registerStats(folly::StringPiece counterName, + std::vector methods) { + using std::chrono::seconds; + + auto& sm = get(); std::string name = counterName.toString(); folly::RWSpinLock::WriteHolder wh(sm.nameMapLock_); auto it = sm.nameMap_.find(name); if (it != sm.nameMap_.end()) { - DCHECK_GT(it->second.id_.index(), 0); + DCHECK(!it->second.id_.isHisto()); VLOG(2) << "The counter \"" << name << "\" already exists"; it->second.methods_ = methods; - return it->second.id_.index(); + return it->second.id_; } // Insert the Stats - sm.stats_.emplace_back(std::make_pair( - std::make_unique(), - std::make_unique(60, - std::initializer_list( - {seconds(5), seconds(60), seconds(600), seconds(3600)})))); - int32_t index = sm.stats_.size(); - sm.nameMap_.emplace( + sm.stats_.emplace( + counterName, + std::make_pair(std::make_unique(), + std::make_unique( + 60, + std::initializer_list( + {seconds(5), seconds(60), seconds(600), seconds(3600)})))); + std::string index(counterName); + auto it2 = sm.nameMap_.emplace( std::piecewise_construct, std::forward_as_tuple(std::move(name)), std::forward_as_tuple( - index, std::move(methods), std::vector>())); + index, std::move(methods), std::vector>(), false)); VLOG(1) << "Registered stats " << counterName.toString(); - return index; + return it2.first->second.id_; } // static @@ -103,39 +112,147 @@ CounterId StatsManager::registerHisto(folly::StringPiece counterName, StatsManager::VT min, StatsManager::VT max, std::string stats) { - using std::chrono::seconds; std::vector methods; std::vector> percentiles; parseStats(stats, methods, percentiles); + return registerHisto(counterName, bucketSize, min, max, methods, percentiles); +} +// static +CounterId StatsManager::registerHisto(folly::StringPiece counterName, + StatsManager::VT bucketSize, + StatsManager::VT min, + StatsManager::VT max, + std::vector methods, + std::vector> percentiles) { + using std::chrono::seconds; auto& sm = get(); std::string name = counterName.toString(); folly::RWSpinLock::WriteHolder wh(sm.nameMapLock_); auto it = sm.nameMap_.find(name); if (it != sm.nameMap_.end()) { - DCHECK_LT(it->second.id_.index(), 0); + DCHECK(it->second.id_.isHisto()); VLOG(2) << "The counter \"" << name << "\" already exists"; it->second.methods_ = methods; it->second.percentiles_ = percentiles; - return it->second.id_.index(); + return it->second.id_; } // Insert the Histogram - sm.histograms_.emplace_back( + sm.histograms_.emplace( + counterName, std::make_pair(std::make_unique(), std::make_unique( bucketSize, min, max, StatsType(60, {seconds(5), seconds(60), seconds(600), seconds(3600)})))); - int32_t index = -sm.histograms_.size(); - sm.nameMap_.emplace(std::piecewise_construct, - std::forward_as_tuple(std::move(name)), - std::forward_as_tuple(index, std::move(methods), std::move(percentiles))); + std::string index(counterName); + auto it2 = sm.nameMap_.emplace( + std::piecewise_construct, + std::forward_as_tuple(std::move(name)), + std::forward_as_tuple( + index, std::move(methods), std::move(percentiles), true, bucketSize, min, max)); VLOG(1) << "Registered histogram " << counterName.toString() << " [bucketSize: " << bucketSize << ", min value: " << min << ", max value: " << max << "]"; - return index; + return it2.first->second.id_; +} + +// static +CounterId StatsManager::counterWithLabels(const CounterId& id, + const std::vector& labels) { + using std::chrono::seconds; + + auto& sm = get(); + auto index = id.index(); + CHECK(!labels.empty()); + std::string newIndex; + for (auto& [k, v] : labels) { + newIndex.append(k).append("_").append(v).append("_"); + } + newIndex.append(index); + auto it = sm.nameMap_.find(newIndex); + // Get the counter if it already exists + if (it != sm.nameMap_.end()) { + return it->second.id_; + } + + // Register a new counter if it doesn't exist + auto it2 = sm.nameMap_.find(index); + DCHECK(it2 != sm.nameMap_.end()); + auto& methods = it2->second.methods_; + + return registerStats(newIndex, methods); +} + +// static +CounterId StatsManager::histoWithLabels(const CounterId& id, const std::vector& labels) { + using std::chrono::seconds; + + auto& sm = get(); + auto index = id.index(); + CHECK(!labels.empty()); + std::string newIndex; + for (auto& [k, v] : labels) { + newIndex.append(k).append("_").append(v).append("_"); + } + newIndex.append(index); + auto it = sm.nameMap_.find(newIndex); + // Get the counter if it already exists + if (it != sm.nameMap_.end()) { + return it->second.id_; + } + + auto it2 = sm.nameMap_.find(index); + DCHECK(it2 != sm.nameMap_.end()); + auto& methods = it2->second.methods_; + auto& percentiles = it2->second.percentiles_; + + return registerHisto( + newIndex, it2->second.bucketSize_, it2->second.min_, it2->second.max_, methods, percentiles); +} + +// static +void StatsManager::removeCounterWithLabels(const CounterId& id, + const std::vector& labels) { + using std::chrono::seconds; + + auto& sm = get(); + auto index = id.index(); + CHECK(!labels.empty()); + std::string newIndex; + for (auto& [k, v] : labels) { + newIndex.append(k).append("_").append(v).append("_"); + } + newIndex.append(index); + folly::RWSpinLock::WriteHolder wh(sm.nameMapLock_); + auto it = sm.nameMap_.find(newIndex); + if (it != sm.nameMap_.end()) { + sm.nameMap_.erase(it); + } + sm.stats_.erase(newIndex); +} + +// static +void StatsManager::removeHistoWithLabels(const CounterId& id, + const std::vector& labels) { + using std::chrono::seconds; + + auto& sm = get(); + auto index = id.index(); + CHECK(!labels.empty()); + std::string newIndex; + for (auto& [k, v] : labels) { + newIndex.append(k).append("_").append(v).append("_"); + } + newIndex.append(index); + folly::RWSpinLock::WriteHolder wh(sm.nameMapLock_); + auto it = sm.nameMap_.find(newIndex); + if (it != sm.nameMap_.end()) { + sm.nameMap_.erase(it); + } + sm.histograms_.erase(newIndex); } // static @@ -143,21 +260,21 @@ void StatsManager::addValue(const CounterId& id, VT value) { using std::chrono::seconds; auto& sm = get(); - int32_t index = id.index(); - if (index > 0) { + if (!id.valid()) { + LOG(FATAL) << "Invalid counter id"; + } + std::string index = id.index(); + bool isHisto = id.isHisto(); + if (!isHisto) { // Stats - --index; - DCHECK_LT(index, sm.stats_.size()); + DCHECK(sm.stats_.find(index) != sm.stats_.end()); std::lock_guard g(*(sm.stats_[index].first)); sm.stats_[index].second->addValue(seconds(time::WallClock::fastNowInSec()), value); - } else if (index < 0) { + } else { // Histogram - index = -(index + 1); - DCHECK_LT(index, sm.histograms_.size()); + DCHECK(sm.histograms_.find(index) != sm.histograms_.end()); std::lock_guard g(*(sm.histograms_[index].first)); sm.histograms_[index].second->addValue(seconds(time::WallClock::fastNowInSec()), value); - } else { - LOG(FATAL) << "Invalid counter id"; } } @@ -325,23 +442,21 @@ StatusOr StatsManager::readStats(const CounterId& id, StatsManager::StatsMethod method) { using std::chrono::seconds; auto& sm = get(); - int32_t index = id.index(); + std::string index = id.index(); - if (index == 0) { + if (index == "") { return Status::Error("Invalid stats"); } - if (index > 0) { + if (!id.isHisto()) { // stats - --index; - DCHECK_LT(index, sm.stats_.size()); + DCHECK(sm.stats_.find(index) != sm.stats_.end()); std::lock_guard g(*(sm.stats_[index].first)); sm.stats_[index].second->update(seconds(time::WallClock::fastNowInSec())); return readValue(*(sm.stats_[index].second), range, method); } else { // histograms_ - index = -(index + 1); - DCHECK_LT(index, sm.histograms_.size()); + DCHECK(sm.histograms_.find(index) != sm.histograms_.end()); std::lock_guard g(*(sm.histograms_[index].first)); sm.histograms_[index].second->update(seconds(time::WallClock::fastNowInSec())); return readValue(*(sm.histograms_[index].second), range, method); @@ -376,13 +491,14 @@ StatusOr StatsManager::readHisto(const CounterId& id, double pct) { using std::chrono::seconds; auto& sm = get(); - int32_t index = id.index(); + std::string index = id.index(); - if (index >= 0) { + if (!id.isHisto()) { return Status::Error("Invalid stats"); } - index = -(index + 1); - if (static_cast(index) >= sm.histograms_.size()) { + + auto it = sm.histograms_.find(index); + if (it == sm.histograms_.end()) { return Status::Error("Invalid stats"); } diff --git a/src/common/stats/StatsManager.h b/src/common/stats/StatsManager.h index cc2dfe75f75..52dac86a60c 100644 --- a/src/common/stats/StatsManager.h +++ b/src/common/stats/StatsManager.h @@ -21,8 +21,9 @@ namespace stats { // A wrapper class of counter index. Each instance can only be writtern once. class CounterId final { public: - CounterId() : index_{0} {} - CounterId(int32_t index) : index_{index} {} // NOLINT + CounterId() = default; + CounterId(const std::string& index, bool isHisto = false) // NOLINT + : index_{index}, isHisto_(isHisto) {} CounterId(const CounterId&) = default; CounterId& operator=(const CounterId& right) { @@ -34,11 +35,12 @@ class CounterId final { LOG(FATAL) << "CounterId cannot be assigned twice"; } index_ = right.index_; + isHisto_ = right.isHisto_; return *this; } - CounterId& operator=(int32_t right) { - if (right == 0) { + CounterId& operator=(const std::string& right) { + if (right == "") { LOG(FATAL) << "Invalid counter id"; } if (valid()) { @@ -49,12 +51,15 @@ class CounterId final { return *this; } - bool valid() const { return index_ != 0; } + bool valid() const { return index_ != ""; } - int32_t index() const { return index_; } + bool isHisto() const { return isHisto_; } + + std::string index() const { return index_; } private: - int32_t index_; + std::string index_; + bool isHisto_{false}; }; /** @@ -84,6 +89,7 @@ class StatsManager final { using VT = int64_t; using StatsType = folly::MultiLevelTimeSeries; using HistogramType = folly::TimeseriesHistogram; + using LabelPair = std::pair; public: enum class StatsMethod { SUM = 1, COUNT, AVG, RATE }; @@ -110,8 +116,21 @@ class StatsManager final { // those specified in the parameter **stats**. If **stats** is empty, nothing // will return from readAllValue() static CounterId registerStats(folly::StringPiece counterName, std::string stats); + static CounterId registerStats(folly::StringPiece counterName, std::vector methods); static CounterId registerHisto( folly::StringPiece counterName, VT bucketSize, VT min, VT max, std::string stats); + static CounterId registerHisto(folly::StringPiece counterName, + VT bucketSize, + VT min, + VT max, + std::vector methods, + std::vector> percentiles); + + static CounterId counterWithLabels(const CounterId& id, const std::vector& labels); + static CounterId histoWithLabels(const CounterId& id, const std::vector& labels); + + static void removeCounterWithLabels(const CounterId& id, const std::vector& labels); + static void removeHistoWithLabels(const CounterId& id, const std::vector& labels); static void addValue(const CounterId& id, VT value = 1); static void decValue(const CounterId& id, VT value = 1); @@ -153,11 +172,21 @@ class StatsManager final { CounterId id_; std::vector methods_; std::vector> percentiles_; + VT bucketSize_, min_, max_; - CounterInfo(int32_t index, + CounterInfo(const std::string& index, std::vector&& methods, - std::vector>&& percentiles) - : id_(index), methods_(std::move(methods)), percentiles_(std::move(percentiles)) {} + std::vector>&& percentiles, + bool isHisto = false, + VT bucketSize = VT(), + VT min = VT(), + VT max = VT()) + : id_(index, isHisto), + methods_(std::move(methods)), + percentiles_(std::move(percentiles)), + bucketSize_(bucketSize), + min_(min), + max_(max) {} }; std::string domain_; @@ -171,10 +200,14 @@ class StatsManager final { std::unordered_map nameMap_; // All time series stats - std::vector, std::unique_ptr>> stats_; + std::unordered_map, std::unique_ptr>> + stats_; // All histogram stats - std::vector, std::unique_ptr>> histograms_; + std::unordered_map, std::unique_ptr>> + histograms_; }; } // namespace stats diff --git a/src/graph/executor/admin/SpaceExecutor.cpp b/src/graph/executor/admin/SpaceExecutor.cpp index f2a452482fc..cb18bdfd6c7 100644 --- a/src/graph/executor/admin/SpaceExecutor.cpp +++ b/src/graph/executor/admin/SpaceExecutor.cpp @@ -5,10 +5,12 @@ #include "graph/executor/admin/SpaceExecutor.h" +#include "common/stats/StatsManager.h" #include "common/time/ScopedTimer.h" #include "graph/context/QueryContext.h" #include "graph/planner/plan/Admin.h" #include "graph/service/PermissionManager.h" +#include "graph/stats/StatsDef.h" #include "graph/util/FTIndexUtils.h" #include "graph/util/SchemaUtil.h" @@ -140,6 +142,7 @@ folly::Future DropSpaceExecutor::execute() { LOG(ERROR) << "Drop space `" << dsNode->getSpaceName() << "' failed: " << resp.status(); return resp.status(); } + unRegisterSpaceLevelMetrics(dsNode->getSpaceName()); if (dsNode->getSpaceName() == qctx()->rctx()->session()->space().name) { SpaceInfo spaceInfo; spaceInfo.name = ""; @@ -163,6 +166,16 @@ folly::Future DropSpaceExecutor::execute() { }); } +void DropSpaceExecutor::unRegisterSpaceLevelMetrics(const std::string &spaceName) { + if (FLAGS_enable_space_level_metrics) { + stats::StatsManager::removeCounterWithLabels(kNumQueries, {{"space", spaceName}}); + stats::StatsManager::removeCounterWithLabels(kNumSlowQueries, {{"space", spaceName}}); + stats::StatsManager::removeCounterWithLabels(kNumQueryErrors, {{"space", spaceName}}); + stats::StatsManager::removeHistoWithLabels(kQueryLatencyUs, {{"space", spaceName}}); + stats::StatsManager::removeHistoWithLabels(kSlowQueryLatencyUs, {{"space", spaceName}}); + } +} + folly::Future ShowSpacesExecutor::execute() { SCOPED_TIMER(&execTime_); diff --git a/src/graph/executor/admin/SpaceExecutor.h b/src/graph/executor/admin/SpaceExecutor.h index e0c917d93f0..f61e5e3e95a 100644 --- a/src/graph/executor/admin/SpaceExecutor.h +++ b/src/graph/executor/admin/SpaceExecutor.h @@ -40,6 +40,8 @@ class DropSpaceExecutor final : public Executor { DropSpaceExecutor(const PlanNode *node, QueryContext *qctx) : Executor("DropSpaceExecutor", node, qctx) {} + void unRegisterSpaceLevelMetrics(const std::string &spaceName); + folly::Future execute() override; }; diff --git a/src/graph/executor/test/CMakeLists.txt b/src/graph/executor/test/CMakeLists.txt index 2dc394bfeb0..137117153ff 100644 --- a/src/graph/executor/test/CMakeLists.txt +++ b/src/graph/executor/test/CMakeLists.txt @@ -12,6 +12,7 @@ SET(EXEC_QUERY_TEST_OBJS $ $ $ + $ $ $ $ diff --git a/src/graph/planner/test/CMakeLists.txt b/src/graph/planner/test/CMakeLists.txt index bb45ebd9201..02aa6b5edb0 100644 --- a/src/graph/planner/test/CMakeLists.txt +++ b/src/graph/planner/test/CMakeLists.txt @@ -18,6 +18,7 @@ nebula_add_test( $ $ $ + $ $ $ $ diff --git a/src/graph/service/GraphService.cpp b/src/graph/service/GraphService.cpp index 7fcdf14bddf..65789d0deba 100644 --- a/src/graph/service/GraphService.cpp +++ b/src/graph/service/GraphService.cpp @@ -195,6 +195,9 @@ folly::Future GraphService::future_execute(int64_t sessionId, new std::string(folly::stringPrintf("SessionId[%ld] does not exist", sessionId))); return ctx->finish(); } + stats::StatsManager::addValue(kNumQueries); + stats::StatsManager::addValue( + stats::StatsManager::counterWithLabels(kNumQueries, {{"space", sessionPtr->space().name}})); ctx->setSession(std::move(sessionPtr)); queryEngine_->execute(std::move(ctx)); stats::StatsManager::decValue(kNumActiveQueries); diff --git a/src/graph/service/QueryInstance.cpp b/src/graph/service/QueryInstance.cpp index 358d357033a..be2d6b709aa 100644 --- a/src/graph/service/QueryInstance.cpp +++ b/src/graph/service/QueryInstance.cpp @@ -101,7 +101,7 @@ void QueryInstance::onFinish() { auto latency = rctx->duration().elapsedInUSec(); rctx->resp().latencyInUs = latency; - addSlowQueryStats(latency); + addSlowQueryStats(latency, spaceName); rctx->finish(); rctx->session()->deleteQuery(qctx_.get()); @@ -161,17 +161,30 @@ void QueryInstance::onError(Status status) { auto latency = rctx->duration().elapsedInUSec(); rctx->resp().latencyInUs = latency; stats::StatsManager::addValue(kNumQueryErrors); - addSlowQueryStats(latency); + stats::StatsManager::addValue( + stats::StatsManager::counterWithLabels(kNumQueryErrors, {{"space", spaceName}})); + addSlowQueryStats(latency, spaceName); rctx->session()->deleteQuery(qctx_.get()); rctx->finish(); delete this; } -void QueryInstance::addSlowQueryStats(uint64_t latency) const { +void QueryInstance::addSlowQueryStats(uint64_t latency, const std::string &spaceName) const { stats::StatsManager::addValue(kQueryLatencyUs, latency); + if (FLAGS_enable_space_level_metrics) { + stats::StatsManager::addValue( + stats::StatsManager::histoWithLabels(kQueryLatencyUs, {{"space", spaceName}}), latency); + } if (latency > static_cast(FLAGS_slow_query_threshold_us)) { stats::StatsManager::addValue(kNumSlowQueries); stats::StatsManager::addValue(kSlowQueryLatencyUs, latency); + if (FLAGS_enable_space_level_metrics) { + stats::StatsManager::addValue( + stats::StatsManager::counterWithLabels(kNumSlowQueries, {{"space", spaceName}})); + stats::StatsManager::addValue( + stats::StatsManager::histoWithLabels(kSlowQueryLatencyUs, {{"space", spaceName}}), + latency); + } } } diff --git a/src/graph/service/QueryInstance.h b/src/graph/service/QueryInstance.h index c8c51bb46cf..d9d9c144146 100644 --- a/src/graph/service/QueryInstance.h +++ b/src/graph/service/QueryInstance.h @@ -49,7 +49,7 @@ class QueryInstance final : public cpp::NonCopyable, public cpp::NonMovable { Status validateAndOptimize(); // return true if continue to execute bool explainOrContinue(); - void addSlowQueryStats(uint64_t latency) const; + void addSlowQueryStats(uint64_t latency, const std::string& spaceName) const; void fillRespData(ExecutionResponse* resp); Status findBestPlan(); diff --git a/src/graph/stats/StatsDef.cpp b/src/graph/stats/StatsDef.cpp index 3143194bdc2..7fa32041934 100644 --- a/src/graph/stats/StatsDef.cpp +++ b/src/graph/stats/StatsDef.cpp @@ -12,6 +12,7 @@ DEFINE_int32(slow_query_threshold_us, 200000, "Any query slower than this threshold value will be considered" " as a slow query"); +DEFINE_bool(enable_space_level_metrics, false, "Whether to enable space level metrircs"); namespace nebula { diff --git a/src/graph/stats/StatsDef.h b/src/graph/stats/StatsDef.h index 32848ae24ef..58ea244156d 100644 --- a/src/graph/stats/StatsDef.h +++ b/src/graph/stats/StatsDef.h @@ -10,6 +10,7 @@ #include "common/stats/StatsManager.h" DECLARE_int32(slow_query_threshold_us); +DECLARE_bool(enable_space_level_metrics); namespace nebula {