Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add overload manager to bootstrap config #4038

Merged
merged 4 commits into from
Aug 7, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/docs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ proto_library(
"//envoy/config/metrics/v2:stats",
"//envoy/config/ratelimit/v2:rls",
"//envoy/config/rbac/v2alpha:rbac",
"//envoy/config/resource_monitor/fixed_heap/v2alpha:fixed_heap",
"//envoy/config/trace/v2:trace",
"//envoy/config/transport_socket/capture/v2alpha:capture",
"//envoy/data/accesslog/v2:accesslog",
Expand Down
2 changes: 2 additions & 0 deletions api/envoy/config/bootstrap/v2/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ api_proto_library_internal(
"//envoy/api/v2/core:config_source",
"//envoy/config/metrics/v2:metrics_service",
"//envoy/config/metrics/v2:stats",
"//envoy/config/overload/v2alpha:overload",
"//envoy/config/ratelimit/v2:rls",
"//envoy/config/trace/v2:trace",
],
Expand All @@ -32,6 +33,7 @@ api_go_proto_library(
"//envoy/api/v2/core:config_source_go_proto",
"//envoy/config/metrics/v2:metrics_service_go_proto",
"//envoy/config/metrics/v2:stats_go_proto",
"//envoy/config/overload/v2alpha:overload_go_proto",
"//envoy/config/ratelimit/v2:rls_go_grpc",
"//envoy/config/trace/v2:trace_go_proto",
],
Expand Down
4 changes: 4 additions & 0 deletions api/envoy/config/bootstrap/v2/bootstrap.proto
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import "envoy/api/v2/cds.proto";
import "envoy/api/v2/lds.proto";
import "envoy/config/trace/v2/trace.proto";
import "envoy/config/metrics/v2/stats.proto";
import "envoy/config/overload/v2alpha/overload.proto";
import "envoy/config/ratelimit/v2/rls.proto";

import "google/protobuf/duration.proto";
Expand Down Expand Up @@ -123,6 +124,9 @@ message Bootstrap {

// Configuration for the local administration HTTP server.
Admin admin = 12 [(validate.rules).message.required = true, (gogoproto.nullable) = false];

// Optional overload manager configuration.
envoy.config.overload.v2alpha.OverloadManager overload_manager = 15;
}

// Administration interface :ref:`operations documentation
Expand Down
8 changes: 7 additions & 1 deletion api/envoy/config/overload/v2alpha/BUILD
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
load("//bazel:api_build_system.bzl", "api_proto_library_internal")
load("//bazel:api_build_system.bzl", "api_go_proto_library", "api_proto_library_internal")

licenses(["notice"]) # Apache 2

api_proto_library_internal(
name = "overload",
srcs = ["overload.proto"],
visibility = ["//visibility:public"],
)

api_go_proto_library(
name = "overload",
proto = ":overload",
)
23 changes: 16 additions & 7 deletions api/envoy/config/overload/v2alpha/overload.proto
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,30 @@ import "google/protobuf/struct.proto";

import "validate/validate.proto";

// The Overload Manager provides an extensible framework to protect Envoy instances
// from overload of various resources (memory, cpu, file descriptors, etc)
// [#protodoc-title: Overload Manager]

message EmptyConfig {
}
// The Overload Manager provides an extensible framework to protect Envoy instances
// from overload of various resources (memory, cpu, file descriptors, etc).
// It monitors a configurable set of resources and notifies registered listeners
// when triggers related to those resources fire.

message ResourceMonitor {
// The name of the resource monitor to instantiate. Must match a registered
// resource monitor type.
// resource monitor type. The built-in resource monitors are:
//
// clang-format off
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this should be here :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without this clang-format suppression, it complains that the line is too long but fix-format splits the line like this:
// * :ref:envoy.resource_monitors.fixed_heap // <envoy_api_msg_config.resource_monitor.fixed_heap.v2alpha.FixedHeapConfig>

which breaks documentation:
/source/generated/rst/api-v2/config/overload/v2alpha/overload.proto.rst:32:Inline interpreted text or phrase reference start-string without end-string.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused how clang-format is running on RST or proto; I think we run our fix-format script on the protos, but I don't think Clang has any proto knowledge to contribute..

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We run clang-format on protos. It has a proto mode.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed - realized I just needed to add a couple spaces to the ref comment so it could be parsed correctly by the docs generator.

// * :ref:`envoy.resource_monitors.fixed_heap <envoy_api_msg_config.resource_monitor.fixed_heap.v2alpha.FixedHeapConfig>`
// clang-format on
string name = 1 [(validate.rules).string.min_bytes = 1];

// Configuration for the resource monitor being instantiated.
google.protobuf.Struct config = 2;
}

// Convenience protobuf for resource monitors that do not require any configuration.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this? Can't we just use google.protobuf.Empty?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, added a helper EmptyConfigFactoryBase class for resources monitors with no configs that uses google.protobuf.Empty.

message EmptyConfig {
}

message ThresholdTrigger {
// If the resource pressure is greater than or equal to this value, the trigger
// will fire.
Expand All @@ -45,7 +54,7 @@ message OverloadAction {
// DNS to ensure uniqueness.
string name = 1 [(validate.rules).string.min_bytes = 1];

// A set of triggers for this action. If any of these triggers fires the overload action
// A set of triggers for this action. If any of these triggers fire the overload action
// is activated. Listeners are notified when the overload action transitions from
// inactivated to activated, or vice versa.
repeated Trigger triggers = 2 [(validate.rules).repeated .min_items = 1];
Expand All @@ -59,5 +68,5 @@ message OverloadManager {
repeated ResourceMonitor resource_monitors = 2 [(validate.rules).repeated .min_items = 1];

// The set of overload actions.
repeated OverloadAction actions = 3 [(validate.rules).repeated .min_items = 1];
repeated OverloadAction actions = 3;
}
1 change: 1 addition & 0 deletions api/envoy/config/resource_monitor/fixed_heap/v2alpha/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ licenses(["notice"]) # Apache 2
api_proto_library_internal(
name = "fixed_heap",
srcs = ["fixed_heap.proto"],
visibility = ["//visibility:public"],
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@ syntax = "proto3";
package envoy.config.resource_monitor.fixed_heap.v2alpha;
option go_package = "v2alpha";

// [#protodoc-title: Fixed heap]

// The fixed heap resource monitor reports the Envoy process memory pressure, computed as a
// fraction of currently reserved heap memory divided by a statically configured maximum
// specified in the FixedHeapConfig.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: remove blank line.

message FixedHeapConfig {
// Limit of the Envoy process heap size. This is used to calculate heap memory pressure which
// is defined as (current heap size)/max_heap_size_bytes.
uint64 max_heap_size_bytes = 1;
}
2 changes: 2 additions & 0 deletions docs/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ PROTO_RST="
/envoy/config/filter/network/redis_proxy/v2/redis_proxy/envoy/config/filter/network/redis_proxy/v2/redis_proxy.proto.rst
/envoy/config/filter/network/tcp_proxy/v2/tcp_proxy/envoy/config/filter/network/tcp_proxy/v2/tcp_proxy.proto.rst
/envoy/config/health_checker/redis/v2/redis/envoy/config/health_checker/redis/v2/redis.proto.rst
/envoy/config/overload/v2alpha/overload/envoy/config/overload/v2alpha/overload.proto.rst
/envoy/config/rbac/v2alpha/rbac/envoy/config/rbac/v2alpha/rbac.proto.rst
/envoy/config/resource_monitor/fixed_heap/v2alpha/fixed_heap/envoy/config/resource_monitor/fixed_heap/v2alpha/fixed_heap.proto.rst
/envoy/config/transport_socket/capture/v2alpha/capture/envoy/config/transport_socket/capture/v2alpha/capture.proto.rst
/envoy/data/accesslog/v2/accesslog/envoy/data/accesslog/v2/accesslog.proto.rst
/envoy/data/core/v2alpha/health_check_event/envoy/data/core/v2alpha/health_check_event.proto.rst
Expand Down
1 change: 1 addition & 0 deletions docs/root/api-v2/bootstrap/bootstrap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ Bootstrap
../config/bootstrap/v2/bootstrap.proto
../config/metrics/v2/stats.proto
../config/metrics/v2/metrics_service.proto
../config/overload/v2alpha/overload.proto
../config/ratelimit/v2/rls.proto
../config/trace/v2/trace.proto
1 change: 1 addition & 0 deletions docs/root/api-v2/config/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Extensions
rbac/rbac
health_checker/health_checker
transport_socket/transport_socket
resource_monitor/resource_monitor
10 changes: 10 additions & 0 deletions docs/root/api-v2/config/resource_monitor/resource_monitor.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
.. _config_resource_monitors:

Resource monitors
=================

.. toctree::
:glob:
:maxdepth: 1

*/v2alpha/*
1 change: 1 addition & 0 deletions docs/root/configuration/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ Configuration reference
runtime
statistics
tools/router_check
overload_manager/overload_manager
38 changes: 38 additions & 0 deletions docs/root/configuration/overload_manager/overload_manager.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
.. _config_overload_manager:

Overload manager
================

The overload manager is configured in the Boostrap
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe link "overload manager" back to the architecture overview intro.

:ref:`overload_manager <envoy_api_field_config.bootstrap.v2.Bootstrap.overload_manager>`
field.

Resource monitors
-----------------

The overload manager uses Envoy's :ref:`extension <extending>` framework for defining
resource monitors. Envoy's builtin resource monitors are listed
:ref:`here <config_resource_monitors>`.

Statistics
----------

Each configured resource monitor has a statistics tree rooted at *overload.<name>.*
with the following statistics:

.. csv-table::
:header: Name, Type, Description
:widths: 1, 1, 2

pressure, Gauge, Resource pressure as a percent
failed_updates, Counter, Total failed attempts to update the resource pressure
skipped_updates, Counter, Total skipped attempts to update the resource pressure due to a pending update

Each configured overload action has a statistics tree rooted at *overload.<name>.*
with the following statistics:

.. csv-table::
:header: Name, Type, Description
:widths: 1, 1, 2

active, Gauge, "Active state of the action (0=inactive, 1=active)"
1 change: 1 addition & 0 deletions docs/root/intro/arch_overview/arch_overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ Architecture overview
draining
scripting
ext_authz_filter
overload_manager
14 changes: 14 additions & 0 deletions docs/root/intro/arch_overview/overload_manager.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
.. _arch_overview_overload_manager:

Overload manager
================

The overload manager is an extensible component for protecting the Envoy server from overload
with respect to various system resources (such as memory, cpu or file descriptors) due to too
many client connections or requests. This is distinct from
:ref:`circuit breaking <arch_overview_circuit_break>` which is primarily aimed at protecting
upstream services.

The overload manager is :ref:`configured <config_overload_manager>` by specifying a set of
resources to monitor and a set of overload actions that will be taken when some of those
resources exceed certain pressure thresholds.
1 change: 1 addition & 0 deletions include/envoy/server/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ envoy_cc_library(
"//include/envoy/ratelimit:ratelimit_interface",
"//include/envoy/runtime:runtime_interface",
"//include/envoy/secret:secret_manager_interface",
"//include/envoy/server:overload_manager_interface",
"//include/envoy/ssl:context_manager_interface",
"//include/envoy/thread_local:thread_local_interface",
"//include/envoy/tracing:http_tracer_interface",
Expand Down
6 changes: 6 additions & 0 deletions include/envoy/server/instance.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "envoy/server/hot_restart.h"
#include "envoy/server/listener_manager.h"
#include "envoy/server/options.h"
#include "envoy/server/overload_manager.h"
#include "envoy/ssl/context_manager.h"
#include "envoy/thread_local/thread_local.h"
#include "envoy/tracing/http_tracer.h"
Expand Down Expand Up @@ -114,6 +115,11 @@ class Instance {
*/
virtual ListenerManager& listenerManager() PURE;

/**
* @return the server's overload manager.
*/
virtual OverloadManager& overloadManager() PURE;

/**
* @return the server's secret manager
*/
Expand Down
1 change: 1 addition & 0 deletions source/server/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ envoy_cc_library(
"//source/common/stats:thread_local_store_lib",
"//source/common/upstream:cluster_manager_lib",
"//source/common/upstream:health_discovery_service_lib",
"//source/server:overload_manager_lib",
"//source/server/http:admin_lib",
"@envoy_api//envoy/config/bootstrap/v2:bootstrap_cc",
],
Expand Down
1 change: 1 addition & 0 deletions source/server/config_validation/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class ValidationInstance : Logger::Loggable<Logger::Id::main>,
void shutdown() override;
void shutdownAdmin() override { NOT_IMPLEMENTED_GCOVR_EXCL_LINE; }
Singleton::Manager& singletonManager() override { return *singleton_manager_; }
OverloadManager& overloadManager() override { NOT_IMPLEMENTED_GCOVR_EXCL_LINE; }
bool healthCheckFailed() override { NOT_IMPLEMENTED_GCOVR_EXCL_LINE; }
Options& options() override { return options_; }
time_t startTimeCurrentEpoch() override { NOT_IMPLEMENTED_GCOVR_EXCL_LINE; }
Expand Down
3 changes: 3 additions & 0 deletions source/server/overload_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ OverloadManagerImpl::OverloadManagerImpl(
void OverloadManagerImpl::start() {
ASSERT(!started_);
started_ = true;
if (resources_.empty()) {
return;
}
timer_ = dispatcher_.createTimer([this]() -> void {
for (auto& resource : resources_) {
resource.second.update();
Expand Down
8 changes: 8 additions & 0 deletions source/server/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ void InstanceImpl::initialize(Options& options,

loadServerFlags(initial_config.flagsPath());

// Initialize the overload manager early so other modules can register for actions.
overload_manager_.reset(new OverloadManagerImpl(
dispatcher(), stats(),
bootstrap_.has_overload_manager() ? bootstrap_.overload_manager()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: you don't need the conditional here; if you write bootstrap_.overload_manager(), it will give you the empty proto if it hasn't been defined in the config.

: envoy::config::overload::v2alpha::OverloadManager()));

// Workers get created first so they register for thread local updates.
listener_manager_.reset(new ListenerManagerImpl(
*this, listener_component_factory_, worker_factory_, ProdSystemTimeSource::instance_));
Expand Down Expand Up @@ -403,6 +409,8 @@ RunHelper::RunHelper(Event::Dispatcher& dispatcher, Upstream::ClusterManager& cm
}

void InstanceImpl::run() {
overload_manager_->start();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there's an easy way to validate this in the server tests with the mocked overload manager, that could be nice to check.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I moved this call to the RunHelper class so it can be tested.


RunHelper helper(*dispatcher_, clusterManager(), restarter_, access_log_manager_, init_manager_,
[this]() -> void { startWorkers(); });

Expand Down
3 changes: 3 additions & 0 deletions source/server/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "server/http/admin.h"
#include "server/init_manager_impl.h"
#include "server/listener_manager_impl.h"
#include "server/overload_manager_impl.h"
#include "server/test_hooks.h"
#include "server/worker_impl.h"

Expand Down Expand Up @@ -155,6 +156,7 @@ class InstanceImpl : Logger::Loggable<Logger::Id::main>, public Instance {
Init::Manager& initManager() override { return init_manager_; }
ListenerManager& listenerManager() override { return *listener_manager_; }
Secret::SecretManager& secretManager() override { return *secret_manager_; }
OverloadManager& overloadManager() override { return *overload_manager_; }
Runtime::RandomGenerator& random() override { return *random_generator_; }
RateLimit::ClientPtr
rateLimitClient(const absl::optional<std::chrono::milliseconds>& timeout) override {
Expand Down Expand Up @@ -222,6 +224,7 @@ class InstanceImpl : Logger::Loggable<Logger::Id::main>, public Instance {
SystemTime bootstrap_config_update_time_;
Grpc::AsyncClientManagerPtr async_client_manager_;
Upstream::HdsDelegatePtr hds_delegate_;
std::unique_ptr<OverloadManagerImpl> overload_manager_;
};

} // namespace Server
Expand Down
1 change: 1 addition & 0 deletions test/mocks/server/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ envoy_cc_mock(
"//include/envoy/server:health_checker_config_interface",
"//include/envoy/server:instance_interface",
"//include/envoy/server:options_interface",
"//include/envoy/server:overload_manager_interface",
"//include/envoy/server:worker_interface",
"//include/envoy/ssl:context_manager_interface",
"//include/envoy/upstream:health_checker_interface",
Expand Down
1 change: 1 addition & 0 deletions test/mocks/server/mocks.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ MockInstance::MockInstance()
ON_CALL(*this, initManager()).WillByDefault(ReturnRef(init_manager_));
ON_CALL(*this, listenerManager()).WillByDefault(ReturnRef(listener_manager_));
ON_CALL(*this, singletonManager()).WillByDefault(ReturnRef(*singleton_manager_));
ON_CALL(*this, overloadManager()).WillByDefault(ReturnRef(overload_manager_));
}

MockInstance::~MockInstance() {}
Expand Down
13 changes: 13 additions & 0 deletions test/mocks/server/mocks.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "envoy/server/health_checker_config.h"
#include "envoy/server/instance.h"
#include "envoy/server/options.h"
#include "envoy/server/overload_manager.h"
#include "envoy/server/transport_socket_config.h"
#include "envoy/server/worker.h"
#include "envoy/ssl/context_manager.h"
Expand Down Expand Up @@ -270,6 +271,16 @@ class MockWorker : public Worker {
std::function<void()> remove_listener_completion_;
};

class MockOverloadManager : public OverloadManager {
public:
MockOverloadManager() {}
~MockOverloadManager() {}

// OverloadManager
MOCK_METHOD3(registerForAction, void(const std::string& action, Event::Dispatcher& dispatcher,
OverloadActionCb callback));
};

class MockInstance : public Instance {
public:
MockInstance();
Expand Down Expand Up @@ -298,6 +309,7 @@ class MockInstance : public Instance {
MOCK_METHOD0(initManager, Init::Manager&());
MOCK_METHOD0(listenerManager, ListenerManager&());
MOCK_METHOD0(options, Options&());
MOCK_METHOD0(overloadManager, OverloadManager&());
MOCK_METHOD0(random, Runtime::RandomGenerator&());
MOCK_METHOD0(rateLimitClient_, RateLimit::Client*());
MOCK_METHOD0(runtime, Runtime::Loader&());
Expand Down Expand Up @@ -333,6 +345,7 @@ class MockInstance : public Instance {
testing::NiceMock<LocalInfo::MockLocalInfo> local_info_;
testing::NiceMock<Init::MockManager> init_manager_;
testing::NiceMock<MockListenerManager> listener_manager_;
testing::NiceMock<MockOverloadManager> overload_manager_;
Singleton::ManagerPtr singleton_manager_;
};

Expand Down