Skip to content

Commit

Permalink
health_checker: wait for secrets to load before starting heatlth checks
Browse files Browse the repository at this point in the history
This commit is a revival of
envoyproxy#13516 which aimed to solve
envoyproxy#12389. That PR was merged and
then reverted due to a deadlock bug, which is still present in this
commit.

The fix will be applied in a subsequent commit in order to make the
resolution clearer to PR reviewers.

Signed-off-by: Michael Puncel <mpuncel@squareup.com>
  • Loading branch information
mpuncel committed Apr 29, 2021
1 parent 152762a commit e98cbec
Show file tree
Hide file tree
Showing 21 changed files with 244 additions and 6 deletions.
3 changes: 3 additions & 0 deletions docs/root/version_history/current.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ Bug Fixes
* validation: fix an issue that causes TAP sockets to panic during config validation mode.
* xray: fix the default sampling 'rate' for AWS X-Ray tracer extension to be 5% as opposed to 50%.
* zipkin: fix timestamp serializaiton in annotations. A prior bug fix exposed an issue with timestamps being serialized as strings.
* active health checks: health checks using a TLS transport socket and secrets delivered via :ref:`SDS <config_secret_discovery_service>` will now wait until secrets are loaded before the first health check attempt. This should improve startup times by not having to wait for the :ref:`no_traffic_interval <envoy_v3_api_field_config.core.v3.HealthCheck.no_traffic_interval>` until the next attempt.

* http: sending CONNECT_ERROR for HTTP/2 where appropriate during CONNECT requests.

Removed Config or Runtime
-------------------------
Expand Down
7 changes: 7 additions & 0 deletions include/envoy/network/transport_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,13 @@ class TransportSocketFactory {
* negotiation.
*/
virtual bool supportsAlpn() const { return false; }

/**
* @param a callback to be invoked when the secrets required by the created transport
* sockets are ready. Will be invoked immediately if no secrets are required or if they
* are already loaded.
*/
virtual void addReadyCb(std::function<void()> callback) PURE;
};

using TransportSocketFactoryPtr = std::unique_ptr<TransportSocketFactory>;
Expand Down
9 changes: 9 additions & 0 deletions include/envoy/upstream/upstream.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,15 @@ class Host : virtual public HostDescription {
Network::TransportSocketOptionsSharedPtr transport_socket_options,
const envoy::config::core::v3::Metadata* metadata) const PURE;

/**
* Register a callback to be invoked when secrets are ready for the transport socket that
* corresponds to the provided metadata.
* @param callback supplies the callback to be invoked.
* @param metadata supplies the metadata to be used for resolving transport socket matches.
*/
virtual void addReadyCb(std::function<void()> callback,
const envoy::config::core::v3::Metadata* metadata) const PURE;

/**
* @return host specific gauges.
*/
Expand Down
1 change: 1 addition & 0 deletions source/common/network/raw_buffer_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class RawBufferSocketFactory : public TransportSocketFactory {
TransportSocketPtr createTransportSocket(TransportSocketOptionsSharedPtr options) const override;
bool implementsSecureTransport() const override;
bool usesProxyProtocolOptions() const override { return false; }
void addReadyCb(std::function<void()> callback) override { callback(); }
};

} // namespace Network
Expand Down
1 change: 1 addition & 0 deletions source/common/quic/quic_transport_socket_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class QuicTransportSocketFactoryBase : public Network::TransportSocketFactory,
bool implementsSecureTransport() const override { return true; }
bool usesProxyProtocolOptions() const override { return false; }
bool supportsAlpn() const override { return true; }
void addReadyCb(std::function<void()> callback) override { callback(); }

protected:
virtual void onSecretUpdated() PURE;
Expand Down
8 changes: 8 additions & 0 deletions source/common/upstream/health_checker_base_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,14 @@ void HealthCheckerImplBase::ActiveHealthCheckSession::onTimeoutBase() {
handleFailure(envoy::data::core::v3::NETWORK_TIMEOUT);
}

void HealthCheckerImplBase::ActiveHealthCheckSession::start() {
// Start health checks only after secrets are ready for the transport socket
// that health checks will be performed on. If health checks start
// immediately, they may fail with "network" errors due to TLS credentials
// not yet being loaded, which can result in long startup times.
host_->addReadyCb([this] { onInitialInterval(); }, parent_.transportSocketMatchMetadata().get());
}

void HealthCheckerImplBase::ActiveHealthCheckSession::onInitialInterval() {
if (parent_.initial_jitter_.count() == 0) {
onIntervalBase();
Expand Down
2 changes: 1 addition & 1 deletion source/common/upstream/health_checker_base_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class HealthCheckerImplBase : public HealthChecker,
~ActiveHealthCheckSession() override;
HealthTransition setUnhealthy(envoy::data::core::v3::HealthCheckFailureType type);
void onDeferredDeleteBase();
void start() { onInitialInterval(); }
void start();

protected:
ActiveHealthCheckSession(HealthCheckerImplBase& parent, HostSharedPtr host);
Expand Down
8 changes: 8 additions & 0 deletions source/common/upstream/upstream_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,14 @@ HostImpl::createConnection(Event::Dispatcher& dispatcher, const ClusterInfo& clu
return connection;
}

void HostImpl::addReadyCb(std::function<void()> callback,
const envoy::config::core::v3::Metadata* metadata) const {
Network::TransportSocketFactory& factory =
(metadata != nullptr) ? resolveTransportSocketFactory(healthCheckAddress(), metadata)
: transportSocketFactory();
factory.addReadyCb(callback);
}

void HostImpl::weight(uint32_t new_weight) { weight_ = std::max(1U, new_weight); }

std::vector<HostsPerLocalityConstSharedPtr> HostsPerLocalityImpl::filter(
Expand Down
2 changes: 2 additions & 0 deletions source/common/upstream/upstream_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ class HostImpl : public HostDescriptionImpl,
createHealthCheckConnection(Event::Dispatcher& dispatcher,
Network::TransportSocketOptionsSharedPtr transport_socket_options,
const envoy::config::core::v3::Metadata* metadata) const override;
void addReadyCb(std::function<void()> callback,
const envoy::config::core::v3::Metadata* metadata) const override;

std::vector<std::pair<absl::string_view, Stats::PrimitiveGaugeReference>>
gauges() const override {
Expand Down
3 changes: 3 additions & 0 deletions source/extensions/transport_sockets/alts/tsi_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ class TsiSocketFactory : public Network::TransportSocketFactory {
Network::TransportSocketPtr
createTransportSocket(Network::TransportSocketOptionsSharedPtr options) const override;

// TODO(mpuncel) only invoke callback() once secrets are ready.
void addReadyCb(std::function<void()> callback) override { callback(); };

private:
HandshakerFactory handshaker_factory_;
HandshakeValidator handshake_validator_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class UpstreamProxyProtocolSocketFactory : public Network::TransportSocketFactor
createTransportSocket(Network::TransportSocketOptionsSharedPtr options) const override;
bool implementsSecureTransport() const override;
bool usesProxyProtocolOptions() const override { return true; }
void addReadyCb(std::function<void()> callback) override { callback(); };

private:
Network::TransportSocketFactoryPtr transport_socket_factory_;
Expand Down
3 changes: 3 additions & 0 deletions source/extensions/transport_sockets/tap/tap.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class TapSocketFactory : public Network::TransportSocketFactory,
bool implementsSecureTransport() const override;
bool usesProxyProtocolOptions() const override;

// TODO(mpuncel) only invoke callback() once secrets are ready.
void addReadyCb(std::function<void()> callback) override { callback(); };

private:
Network::TransportSocketFactoryPtr transport_socket_factory_;
};
Expand Down
52 changes: 52 additions & 0 deletions source/extensions/transport_sockets/tls/ssl_socket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -380,13 +380,39 @@ bool ClientSslSocketFactory::implementsSecureTransport() const { return true; }

void ClientSslSocketFactory::onAddOrUpdateSecret() {
ENVOY_LOG(debug, "Secret is updated.");
bool should_run_callbacks = false;
{
absl::WriterMutexLock l(&ssl_ctx_mu_);
ssl_ctx_ = manager_.createSslClientContext(stats_scope_, *config_, ssl_ctx_);
if (ssl_ctx_) {
should_run_callbacks = true;
}
}
if (should_run_callbacks) {
for (const auto& cb : secrets_ready_callbacks_) {
cb();
}
secrets_ready_callbacks_.clear();
}
stats_.ssl_context_update_by_sds_.inc();
}

void ClientSslSocketFactory::addReadyCb(std::function<void()> callback) {
bool immediately_run_callback = false;
{
absl::ReaderMutexLock l(&ssl_ctx_mu_);
if (ssl_ctx_) {
immediately_run_callback = true;
}
}

if (immediately_run_callback) {
callback();
} else {
secrets_ready_callbacks_.push_back(callback);
}
}

ServerSslSocketFactory::ServerSslSocketFactory(Envoy::Ssl::ServerContextConfigPtr config,
Envoy::Ssl::ContextManager& manager,
Stats::Scope& stats_scope,
Expand Down Expand Up @@ -421,13 +447,39 @@ bool ServerSslSocketFactory::implementsSecureTransport() const { return true; }

void ServerSslSocketFactory::onAddOrUpdateSecret() {
ENVOY_LOG(debug, "Secret is updated.");
bool should_run_callbacks = false;
{
absl::WriterMutexLock l(&ssl_ctx_mu_);
ssl_ctx_ = manager_.createSslServerContext(stats_scope_, *config_, server_names_, ssl_ctx_);

if (ssl_ctx_) {
should_run_callbacks = true;
}
}
if (should_run_callbacks) {
for (const auto& cb : secrets_ready_callbacks_) {
cb();
}
secrets_ready_callbacks_.clear();
}
stats_.ssl_context_update_by_sds_.inc();
}

void ServerSslSocketFactory::addReadyCb(std::function<void()> callback) {
bool immediately_run_callback = false;
{
absl::ReaderMutexLock l(&ssl_ctx_mu_);
if (ssl_ctx_) {
immediately_run_callback = true;
}
}
if (immediately_run_callback) {
callback();
} else {
secrets_ready_callbacks_.push_back(callback);
}
}

} // namespace Tls
} // namespace TransportSockets
} // namespace Extensions
Expand Down
6 changes: 6 additions & 0 deletions source/extensions/transport_sockets/tls/ssl_socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ class ClientSslSocketFactory : public Network::TransportSocketFactory,
bool usesProxyProtocolOptions() const override { return false; }
bool supportsAlpn() const override { return true; }

void addReadyCb(std::function<void()> callback) override;

// Secret::SecretCallbacks
void onAddOrUpdateSecret() override;

Expand All @@ -125,6 +127,7 @@ class ClientSslSocketFactory : public Network::TransportSocketFactory,
Envoy::Ssl::ClientContextConfigPtr config_;
mutable absl::Mutex ssl_ctx_mu_;
Envoy::Ssl::ClientContextSharedPtr ssl_ctx_ ABSL_GUARDED_BY(ssl_ctx_mu_);
std::list<std::function<void()>> secrets_ready_callbacks_;
};

class ServerSslSocketFactory : public Network::TransportSocketFactory,
Expand All @@ -140,6 +143,8 @@ class ServerSslSocketFactory : public Network::TransportSocketFactory,
bool implementsSecureTransport() const override;
bool usesProxyProtocolOptions() const override { return false; }

void addReadyCb(std::function<void()> callback) override;

// Secret::SecretCallbacks
void onAddOrUpdateSecret() override;

Expand All @@ -151,6 +156,7 @@ class ServerSslSocketFactory : public Network::TransportSocketFactory,
const std::vector<std::string> server_names_;
mutable absl::Mutex ssl_ctx_mu_;
Envoy::Ssl::ServerContextSharedPtr ssl_ctx_ ABSL_GUARDED_BY(ssl_ctx_mu_);
std::list<std::function<void()>> secrets_ready_callbacks_;
};

} // namespace Tls
Expand Down
21 changes: 16 additions & 5 deletions test/common/upstream/health_checker_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,8 @@ TEST_F(HttpHealthCheckerImplTest, TlsOptions) {
Network::TransportSocketFactoryPtr(socket_factory));
cluster_->info_->transport_socket_matcher_.reset(transport_socket_match);

EXPECT_CALL(*socket_factory, addReadyCb(_))
.WillOnce(Invoke([&](std::function<void()> callback) -> void { callback(); }));
EXPECT_CALL(*socket_factory, createTransportSocket(ApplicationProtocolListEq("http1")));

allocHealthChecker(yaml);
Expand Down Expand Up @@ -2582,13 +2584,19 @@ TEST_F(HttpHealthCheckerImplTest, TransportSocketMatchCriteria) {
ALL_TRANSPORT_SOCKET_MATCH_STATS(POOL_COUNTER_PREFIX(stats_store, "test"))};
auto health_check_only_socket_factory = std::make_unique<Network::MockTransportSocketFactory>();

// We expect resolve() to be called twice, once for endpoint socket matching (with no metadata in
// this test) and once for health check socket matching. In the latter we expect metadata that
// matches the above object.
// We expect resolve() to be called 3 times, once for endpoint socket matching (with no metadata
// in this test) and twice for health check socket matching (once for checking if secrets are
// ready on the transport socket, and again for actually getting the health check transport socket
// to create a connection). In the latter 2 calls, we expect metadata that matches the above
// object.
EXPECT_CALL(*transport_socket_match, resolve(nullptr));
EXPECT_CALL(*transport_socket_match, resolve(MetadataEq(metadata)))
.WillOnce(Return(TransportSocketMatcher::MatchData(
*health_check_only_socket_factory, health_transport_socket_stats, "health_check_only")));
.Times(2)
.WillRepeatedly(Return(TransportSocketMatcher::MatchData(
*health_check_only_socket_factory, health_transport_socket_stats, "health_check_only")))
.RetiresOnSaturation();
EXPECT_CALL(*health_check_only_socket_factory, addReadyCb(_))
.WillOnce(Invoke([&](std::function<void()> callback) -> void { callback(); }));
// The health_check_only_socket_factory should be used to create a transport socket for the health
// check connection.
EXPECT_CALL(*health_check_only_socket_factory, createTransportSocket(_));
Expand Down Expand Up @@ -2624,6 +2632,9 @@ TEST_F(HttpHealthCheckerImplTest, NoTransportSocketMatchCriteria) {
)EOF";

auto default_socket_factory = std::make_unique<Network::MockTransportSocketFactory>();

EXPECT_CALL(*default_socket_factory, addReadyCb(_))
.WillOnce(Invoke([&](std::function<void()> callback) -> void { callback(); }));
// The default_socket_factory should be used to create a transport socket for the health check
// connection.
EXPECT_CALL(*default_socket_factory, createTransportSocket(_));
Expand Down
2 changes: 2 additions & 0 deletions test/common/upstream/transport_socket_matcher_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class FakeTransportSocketFactory : public Network::TransportSocketFactory {
MOCK_METHOD(bool, usesProxyProtocolOptions, (), (const));
MOCK_METHOD(Network::TransportSocketPtr, createTransportSocket,
(Network::TransportSocketOptionsSharedPtr), (const));
MOCK_METHOD(void, addReadyCb, (std::function<void()>));
FakeTransportSocketFactory(std::string id) : id_(std::move(id)) {}
std::string id() const { return id_; }

Expand All @@ -50,6 +51,7 @@ class FooTransportSocketFactory
MOCK_METHOD(bool, usesProxyProtocolOptions, (), (const));
MOCK_METHOD(Network::TransportSocketPtr, createTransportSocket,
(Network::TransportSocketOptionsSharedPtr), (const));
MOCK_METHOD(void, addReadyCb, (std::function<void()>));

Network::TransportSocketFactoryPtr
createTransportSocketFactory(const Protobuf::Message& proto,
Expand Down
Loading

0 comments on commit e98cbec

Please sign in to comment.