Skip to content

Commit

Permalink
Add overload manager to bootstrap config (#4038)
Browse files Browse the repository at this point in the history
Initialize on startup and add documentation (issue #373)

Risk Level: low
Testing: unit tests
Docs Changes: add docs for overload manager

Signed-off-by: Elisha Ziskind <eziskind@google.com>
  • Loading branch information
eziskind authored and htuch committed Aug 7, 2018
1 parent b14dee5 commit 14140ad
Show file tree
Hide file tree
Showing 30 changed files with 182 additions and 21 deletions.
1 change: 1 addition & 0 deletions api/docs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ proto_library(
"//envoy/config/metrics/v2:stats",
"//envoy/config/ratelimit/v2:rls",
"//envoy/config/rbac/v2alpha:rbac",
"//envoy/config/resource_monitor/fixed_heap/v2alpha:fixed_heap",
"//envoy/config/trace/v2:trace",
"//envoy/config/transport_socket/capture/v2alpha:capture",
"//envoy/data/accesslog/v2:accesslog",
Expand Down
2 changes: 2 additions & 0 deletions api/envoy/config/bootstrap/v2/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ api_proto_library_internal(
"//envoy/api/v2/core:config_source",
"//envoy/config/metrics/v2:metrics_service",
"//envoy/config/metrics/v2:stats",
"//envoy/config/overload/v2alpha:overload",
"//envoy/config/ratelimit/v2:rls",
"//envoy/config/trace/v2:trace",
],
Expand All @@ -32,6 +33,7 @@ api_go_proto_library(
"//envoy/api/v2/core:config_source_go_proto",
"//envoy/config/metrics/v2:metrics_service_go_proto",
"//envoy/config/metrics/v2:stats_go_proto",
"//envoy/config/overload/v2alpha:overload_go_proto",
"//envoy/config/ratelimit/v2:rls_go_grpc",
"//envoy/config/trace/v2:trace_go_proto",
],
Expand Down
4 changes: 4 additions & 0 deletions api/envoy/config/bootstrap/v2/bootstrap.proto
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import "envoy/api/v2/cds.proto";
import "envoy/api/v2/lds.proto";
import "envoy/config/trace/v2/trace.proto";
import "envoy/config/metrics/v2/stats.proto";
import "envoy/config/overload/v2alpha/overload.proto";
import "envoy/config/ratelimit/v2/rls.proto";

import "google/protobuf/duration.proto";
Expand Down Expand Up @@ -123,6 +124,9 @@ message Bootstrap {

// Configuration for the local administration HTTP server.
Admin admin = 12 [(validate.rules).message.required = true, (gogoproto.nullable) = false];

// Optional overload manager configuration.
envoy.config.overload.v2alpha.OverloadManager overload_manager = 15;
}

// Administration interface :ref:`operations documentation
Expand Down
8 changes: 7 additions & 1 deletion api/envoy/config/overload/v2alpha/BUILD
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
load("//bazel:api_build_system.bzl", "api_proto_library_internal")
load("//bazel:api_build_system.bzl", "api_go_proto_library", "api_proto_library_internal")

licenses(["notice"]) # Apache 2

api_proto_library_internal(
name = "overload",
srcs = ["overload.proto"],
visibility = ["//visibility:public"],
)

api_go_proto_library(
name = "overload",
proto = ":overload",
)
18 changes: 11 additions & 7 deletions api/envoy/config/overload/v2alpha/overload.proto
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,19 @@ import "google/protobuf/struct.proto";

import "validate/validate.proto";

// The Overload Manager provides an extensible framework to protect Envoy instances
// from overload of various resources (memory, cpu, file descriptors, etc)
// [#protodoc-title: Overload Manager]

message EmptyConfig {
}
// The Overload Manager provides an extensible framework to protect Envoy instances
// from overload of various resources (memory, cpu, file descriptors, etc).
// It monitors a configurable set of resources and notifies registered listeners
// when triggers related to those resources fire.

message ResourceMonitor {
// The name of the resource monitor to instantiate. Must match a registered
// resource monitor type.
// resource monitor type. The built-in resource monitors are:
//
// * :ref:`envoy.resource_monitors.fixed_heap
// <envoy_api_msg_config.resource_monitor.fixed_heap.v2alpha.FixedHeapConfig>`
string name = 1 [(validate.rules).string.min_bytes = 1];

// Configuration for the resource monitor being instantiated.
Expand Down Expand Up @@ -45,7 +49,7 @@ message OverloadAction {
// DNS to ensure uniqueness.
string name = 1 [(validate.rules).string.min_bytes = 1];

// A set of triggers for this action. If any of these triggers fires the overload action
// A set of triggers for this action. If any of these triggers fire the overload action
// is activated. Listeners are notified when the overload action transitions from
// inactivated to activated, or vice versa.
repeated Trigger triggers = 2 [(validate.rules).repeated .min_items = 1];
Expand All @@ -59,5 +63,5 @@ message OverloadManager {
repeated ResourceMonitor resource_monitors = 2 [(validate.rules).repeated .min_items = 1];

// The set of overload actions.
repeated OverloadAction actions = 3 [(validate.rules).repeated .min_items = 1];
repeated OverloadAction actions = 3;
}
1 change: 1 addition & 0 deletions api/envoy/config/resource_monitor/fixed_heap/v2alpha/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ licenses(["notice"]) # Apache 2
api_proto_library_internal(
name = "fixed_heap",
srcs = ["fixed_heap.proto"],
visibility = ["//visibility:public"],
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ syntax = "proto3";
package envoy.config.resource_monitor.fixed_heap.v2alpha;
option go_package = "v2alpha";

// [#protodoc-title: Fixed heap]

// The fixed heap resource monitor reports the Envoy process memory pressure, computed as a
// fraction of currently reserved heap memory divided by a statically configured maximum
// specified in the FixedHeapConfig.
message FixedHeapConfig {
// Limit of the Envoy process heap size. This is used to calculate heap memory pressure which
// is defined as (current heap size)/max_heap_size_bytes.
uint64 max_heap_size_bytes = 1;
}
2 changes: 2 additions & 0 deletions docs/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ PROTO_RST="
/envoy/config/filter/network/redis_proxy/v2/redis_proxy/envoy/config/filter/network/redis_proxy/v2/redis_proxy.proto.rst
/envoy/config/filter/network/tcp_proxy/v2/tcp_proxy/envoy/config/filter/network/tcp_proxy/v2/tcp_proxy.proto.rst
/envoy/config/health_checker/redis/v2/redis/envoy/config/health_checker/redis/v2/redis.proto.rst
/envoy/config/overload/v2alpha/overload/envoy/config/overload/v2alpha/overload.proto.rst
/envoy/config/rbac/v2alpha/rbac/envoy/config/rbac/v2alpha/rbac.proto.rst
/envoy/config/resource_monitor/fixed_heap/v2alpha/fixed_heap/envoy/config/resource_monitor/fixed_heap/v2alpha/fixed_heap.proto.rst
/envoy/config/transport_socket/capture/v2alpha/capture/envoy/config/transport_socket/capture/v2alpha/capture.proto.rst
/envoy/data/accesslog/v2/accesslog/envoy/data/accesslog/v2/accesslog.proto.rst
/envoy/data/core/v2alpha/health_check_event/envoy/data/core/v2alpha/health_check_event.proto.rst
Expand Down
1 change: 1 addition & 0 deletions docs/root/api-v2/bootstrap/bootstrap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ Bootstrap
../config/bootstrap/v2/bootstrap.proto
../config/metrics/v2/stats.proto
../config/metrics/v2/metrics_service.proto
../config/overload/v2alpha/overload.proto
../config/ratelimit/v2/rls.proto
../config/trace/v2/trace.proto
1 change: 1 addition & 0 deletions docs/root/api-v2/config/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Extensions
rbac/rbac
health_checker/health_checker
transport_socket/transport_socket
resource_monitor/resource_monitor
10 changes: 10 additions & 0 deletions docs/root/api-v2/config/resource_monitor/resource_monitor.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
.. _config_resource_monitors:

Resource monitors
=================

.. toctree::
:glob:
:maxdepth: 1

*/v2alpha/*
1 change: 1 addition & 0 deletions docs/root/configuration/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ Configuration reference
runtime
statistics
tools/router_check
overload_manager/overload_manager
38 changes: 38 additions & 0 deletions docs/root/configuration/overload_manager/overload_manager.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
.. _config_overload_manager:

Overload manager
================

The :ref:`overload manager <arch_overview_overload_manager>` is configured in the Boostrap
:ref:`overload_manager <envoy_api_field_config.bootstrap.v2.Bootstrap.overload_manager>`
field.

Resource monitors
-----------------

The overload manager uses Envoy's :ref:`extension <extending>` framework for defining
resource monitors. Envoy's builtin resource monitors are listed
:ref:`here <config_resource_monitors>`.

Statistics
----------

Each configured resource monitor has a statistics tree rooted at *overload.<name>.*
with the following statistics:

.. csv-table::
:header: Name, Type, Description
:widths: 1, 1, 2

pressure, Gauge, Resource pressure as a percent
failed_updates, Counter, Total failed attempts to update the resource pressure
skipped_updates, Counter, Total skipped attempts to update the resource pressure due to a pending update

Each configured overload action has a statistics tree rooted at *overload.<name>.*
with the following statistics:

.. csv-table::
:header: Name, Type, Description
:widths: 1, 1, 2

active, Gauge, "Active state of the action (0=inactive, 1=active)"
1 change: 1 addition & 0 deletions docs/root/intro/arch_overview/arch_overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ Architecture overview
draining
scripting
ext_authz_filter
overload_manager
14 changes: 14 additions & 0 deletions docs/root/intro/arch_overview/overload_manager.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
.. _arch_overview_overload_manager:

Overload manager
================

The overload manager is an extensible component for protecting the Envoy server from overload
with respect to various system resources (such as memory, cpu or file descriptors) due to too
many client connections or requests. This is distinct from
:ref:`circuit breaking <arch_overview_circuit_break>` which is primarily aimed at protecting
upstream services.

The overload manager is :ref:`configured <config_overload_manager>` by specifying a set of
resources to monitor and a set of overload actions that will be taken when some of those
resources exceed certain pressure thresholds.
1 change: 1 addition & 0 deletions include/envoy/server/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ envoy_cc_library(
"//include/envoy/ratelimit:ratelimit_interface",
"//include/envoy/runtime:runtime_interface",
"//include/envoy/secret:secret_manager_interface",
"//include/envoy/server:overload_manager_interface",
"//include/envoy/ssl:context_manager_interface",
"//include/envoy/thread_local:thread_local_interface",
"//include/envoy/tracing:http_tracer_interface",
Expand Down
6 changes: 6 additions & 0 deletions include/envoy/server/instance.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "envoy/server/hot_restart.h"
#include "envoy/server/listener_manager.h"
#include "envoy/server/options.h"
#include "envoy/server/overload_manager.h"
#include "envoy/ssl/context_manager.h"
#include "envoy/thread_local/thread_local.h"
#include "envoy/tracing/http_tracer.h"
Expand Down Expand Up @@ -114,6 +115,11 @@ class Instance {
*/
virtual ListenerManager& listenerManager() PURE;

/**
* @return the server's overload manager.
*/
virtual OverloadManager& overloadManager() PURE;

/**
* @return the server's secret manager
*/
Expand Down
6 changes: 6 additions & 0 deletions include/envoy/server/overload_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ class OverloadManager {
public:
virtual ~OverloadManager() {}

/**
* Start a recurring timer to monitor resources and notify listeners when overload actions
* change state.
*/
virtual void start() PURE;

/**
* Register a callback to be invoked when the specified overload action changes state
* (ie. becomes activated or inactivated). Must be called before the start method is called.
Expand Down
27 changes: 27 additions & 0 deletions source/extensions/resource_monitors/common/factory_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,33 @@ class FactoryBase : public Server::Configuration::ResourceMonitorFactory {
const std::string name_;
};

/**
* Factory for resource monitors that have empty configuration blocks.
*/
class EmptyConfigFactoryBase : public Server::Configuration::ResourceMonitorFactory {
public:
Server::ResourceMonitorPtr
createResourceMonitor(const Protobuf::Message&,
Server::Configuration::ResourceMonitorFactoryContext& context) override {
return createEmptyConfigResourceMonitor(context);
}

ProtobufTypes::MessagePtr createEmptyConfigProto() override {
return ProtobufTypes::MessagePtr{new Envoy::ProtobufWkt::Empty()};
}

std::string name() override { return name_; }

protected:
EmptyConfigFactoryBase(const std::string& name) : name_(name) {}

private:
virtual Server::ResourceMonitorPtr createEmptyConfigResourceMonitor(
Server::Configuration::ResourceMonitorFactoryContext& context) PURE;

const std::string name_;
};

} // namespace Common
} // namespace ResourceMonitors
} // namespace Extensions
Expand Down
1 change: 1 addition & 0 deletions source/server/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ envoy_cc_library(
"//source/common/stats:thread_local_store_lib",
"//source/common/upstream:cluster_manager_lib",
"//source/common/upstream:health_discovery_service_lib",
"//source/server:overload_manager_lib",
"//source/server/http:admin_lib",
"@envoy_api//envoy/config/bootstrap/v2:bootstrap_cc",
],
Expand Down
1 change: 1 addition & 0 deletions source/server/config_validation/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class ValidationInstance : Logger::Loggable<Logger::Id::main>,
void shutdown() override;
void shutdownAdmin() override { NOT_IMPLEMENTED_GCOVR_EXCL_LINE; }
Singleton::Manager& singletonManager() override { return *singleton_manager_; }
OverloadManager& overloadManager() override { NOT_IMPLEMENTED_GCOVR_EXCL_LINE; }
bool healthCheckFailed() override { NOT_IMPLEMENTED_GCOVR_EXCL_LINE; }
Options& options() override { return options_; }
time_t startTimeCurrentEpoch() override { NOT_IMPLEMENTED_GCOVR_EXCL_LINE; }
Expand Down
3 changes: 3 additions & 0 deletions source/server/overload_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ OverloadManagerImpl::OverloadManagerImpl(
void OverloadManagerImpl::start() {
ASSERT(!started_);
started_ = true;
if (resources_.empty()) {
return;
}
timer_ = dispatcher_.createTimer([this]() -> void {
for (auto& resource : resources_) {
resource.second.update();
Expand Down
3 changes: 1 addition & 2 deletions source/server/overload_manager_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,8 @@ class OverloadManagerImpl : Logger::Loggable<Logger::Id::main>, public OverloadM
OverloadManagerImpl(Event::Dispatcher& dispatcher, Stats::Scope& stats_scope,
const envoy::config::overload::v2alpha::OverloadManager& config);

void start();

// Server::OverloadManager
void start() override;
void registerForAction(const std::string& action, Event::Dispatcher& dispatcher,
OverloadActionCb callback) override;

Expand Down
11 changes: 9 additions & 2 deletions source/server/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ void InstanceImpl::initialize(Options& options,

loadServerFlags(initial_config.flagsPath());

// Initialize the overload manager early so other modules can register for actions.
overload_manager_.reset(
new OverloadManagerImpl(dispatcher(), stats(), bootstrap_.overload_manager()));

// Workers get created first so they register for thread local updates.
listener_manager_.reset(new ListenerManagerImpl(
*this, listener_component_factory_, worker_factory_, ProdSystemTimeSource::instance_));
Expand Down Expand Up @@ -353,7 +357,8 @@ uint64_t InstanceImpl::numConnections() { return listener_manager_->numConnectio

RunHelper::RunHelper(Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm,
HotRestart& hot_restart, AccessLog::AccessLogManager& access_log_manager,
InitManagerImpl& init_manager, std::function<void()> workers_start_cb) {
InitManagerImpl& init_manager, OverloadManager& overload_manager,
std::function<void()> workers_start_cb) {

// Setup signals.
sigterm_ = dispatcher.listenForSignal(SIGTERM, [this, &hot_restart, &dispatcher]() {
Expand Down Expand Up @@ -400,11 +405,13 @@ RunHelper::RunHelper(Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm
// as we've subscribed to all the statically defined RDS resources.
cm.adsMux().resume(Config::TypeUrl::get().RouteConfiguration);
});

overload_manager.start();
}

void InstanceImpl::run() {
RunHelper helper(*dispatcher_, clusterManager(), restarter_, access_log_manager_, init_manager_,
[this]() -> void { startWorkers(); });
overloadManager(), [this]() -> void { startWorkers(); });

// Run the main dispatch loop waiting to exit.
ENVOY_LOG(info, "starting main dispatch loop");
Expand Down
5 changes: 4 additions & 1 deletion source/server/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "server/http/admin.h"
#include "server/init_manager_impl.h"
#include "server/listener_manager_impl.h"
#include "server/overload_manager_impl.h"
#include "server/test_hooks.h"
#include "server/worker_impl.h"

Expand Down Expand Up @@ -112,7 +113,7 @@ class RunHelper : Logger::Loggable<Logger::Id::main> {
public:
RunHelper(Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm, HotRestart& hot_restart,
AccessLog::AccessLogManager& access_log_manager, InitManagerImpl& init_manager,
std::function<void()> workers_start_cb);
OverloadManager& overload_manager, std::function<void()> workers_start_cb);

private:
Event::SignalEventPtr sigterm_;
Expand Down Expand Up @@ -154,6 +155,7 @@ class InstanceImpl : Logger::Loggable<Logger::Id::main>, public Instance {
Init::Manager& initManager() override { return init_manager_; }
ListenerManager& listenerManager() override { return *listener_manager_; }
Secret::SecretManager& secretManager() override { return *secret_manager_; }
OverloadManager& overloadManager() override { return *overload_manager_; }
Runtime::RandomGenerator& random() override { return *random_generator_; }
RateLimit::ClientPtr
rateLimitClient(const absl::optional<std::chrono::milliseconds>& timeout) override {
Expand Down Expand Up @@ -221,6 +223,7 @@ class InstanceImpl : Logger::Loggable<Logger::Id::main>, public Instance {
SystemTime bootstrap_config_update_time_;
Grpc::AsyncClientManagerPtr async_client_manager_;
Upstream::HdsDelegatePtr hds_delegate_;
std::unique_ptr<OverloadManagerImpl> overload_manager_;
};

} // namespace Server
Expand Down
Loading

0 comments on commit 14140ad

Please sign in to comment.