Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

upstream: fix invalid access of ClusterMap iterator during warming cluster modification #8106

Merged
merged 3 commits into from
Sep 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions source/common/upstream/cluster_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -487,9 +487,22 @@ bool ClusterManagerImpl::addOrUpdateCluster(const envoy::api::v2::Cluster& clust

if (existing_active_cluster != active_clusters_.end() ||
existing_warming_cluster != warming_clusters_.end()) {
// The following init manager remove call is a NOP in the case we are already initialized. It's
// just kept here to avoid additional logic.
init_helper_.removeCluster(*existing_active_cluster->second->cluster_);
if (existing_active_cluster != active_clusters_.end()) {
// The following init manager remove call is a NOP in the case we are already initialized.
// It's just kept here to avoid additional logic.
init_helper_.removeCluster(*existing_active_cluster->second->cluster_);
} else {
// Validate that warming clusters are not added to the init_helper_.
// NOTE: This loop is compiled out in optimized builds.
for (const std::list<Cluster*>& cluster_list :
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: should we #ifdef to avoid this iteration in non-debug mode?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to avoid polluting the code with an #ifdef guard since the compiler optimizes away this for loop. I just confirmed by checking the disassembler output with a -c opt build.

Added a comment to clarify.

{std::cref(init_helper_.primary_init_clusters_),
std::cref(init_helper_.secondary_init_clusters_)}) {
ASSERT(!std::any_of(cluster_list.begin(), cluster_list.end(),
[&existing_warming_cluster](Cluster* cluster) {
return existing_warming_cluster->second->cluster_.get() == cluster;
}));
}
}
cm_stats_.cluster_modified_.inc();
} else {
cm_stats_.cluster_added_.inc();
Expand Down
6 changes: 6 additions & 0 deletions source/common/upstream/cluster_manager_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ class ProdClusterManagerFactory : public ClusterManagerFactory {
Singleton::Manager& singleton_manager_;
};

// For friend declaration in ClusterManagerInitHelper.
class ClusterManagerImpl;

/**
* This is a helper class used during cluster management initialization. Dealing with primary
* clusters, secondary clusters, and CDS, is quite complicated, so this makes it easier to test.
Expand Down Expand Up @@ -129,6 +132,9 @@ class ClusterManagerInitHelper : Logger::Loggable<Logger::Id::upstream> {
State state() const { return state_; }

private:
// To enable invariant assertions on the cluster lists.
friend ClusterManagerImpl;

void initializeSecondaryClusters();
void maybeFinishInitialize();
void onClusterInit(Cluster& cluster);
Expand Down
72 changes: 72 additions & 0 deletions test/common/upstream/cluster_manager_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,78 @@ TEST_F(ClusterManagerImplTest, RemoveWarmingCluster) {
EXPECT_TRUE(Mock::VerifyAndClearExpectations(cluster1.get()));
}

TEST_F(ClusterManagerImplTest, ModifyWarmingCluster) {
time_system_.setSystemTime(std::chrono::milliseconds(1234567891234));
create(defaultConfig());

InSequence s;
ReadyWatcher initialized;
EXPECT_CALL(initialized, ready());
cluster_manager_->setInitializedCb([&]() -> void { initialized.ready(); });

// Add a "fake_cluster" in warming state.
std::shared_ptr<MockClusterMockPrioritySet> cluster1 =
std::make_shared<NiceMock<MockClusterMockPrioritySet>>();
EXPECT_CALL(factory_, clusterFromProto_(_, _, _, _))
.WillOnce(Return(std::make_pair(cluster1, nullptr)));
EXPECT_CALL(*cluster1, initializePhase()).Times(0);
EXPECT_CALL(*cluster1, initialize(_));
EXPECT_TRUE(
cluster_manager_->addOrUpdateCluster(defaultStaticCluster("fake_cluster"), "version1"));
checkStats(1 /*added*/, 0 /*modified*/, 0 /*removed*/, 0 /*active*/, 1 /*warming*/);
EXPECT_EQ(nullptr, cluster_manager_->get("fake_cluster"));
checkConfigDump(R"EOF(
dynamic_warming_clusters:
- version_info: "version1"
cluster:
name: "fake_cluster"
type: STATIC
connect_timeout: 0.25s
hosts:
- socket_address:
address: "127.0.0.1"
port_value: 11001
last_updated:
seconds: 1234567891
nanos: 234000000
)EOF");

// Update the warming cluster that was just added.
std::shared_ptr<MockClusterMockPrioritySet> cluster2 =
std::make_shared<NiceMock<MockClusterMockPrioritySet>>();
EXPECT_CALL(factory_, clusterFromProto_(_, _, _, _))
.WillOnce(Return(std::make_pair(cluster2, nullptr)));
EXPECT_CALL(*cluster2, initializePhase()).Times(0);
EXPECT_CALL(*cluster2, initialize(_));
EXPECT_TRUE(cluster_manager_->addOrUpdateCluster(
parseClusterFromV2Json(fmt::sprintf(kDefaultStaticClusterTmpl, "fake_cluster",
R"EOF(
"socket_address": {
"address": "127.0.0.1",
"port_value": 11002
})EOF")),
"version2"));
checkStats(1 /*added*/, 1 /*modified*/, 0 /*removed*/, 0 /*active*/, 1 /*warming*/);
checkConfigDump(R"EOF(
dynamic_warming_clusters:
- version_info: "version2"
cluster:
name: "fake_cluster"
type: STATIC
connect_timeout: 0.25s
hosts:
- socket_address:
address: "127.0.0.1"
port_value: 11002
last_updated:
seconds: 1234567891
nanos: 234000000
)EOF");

EXPECT_TRUE(Mock::VerifyAndClearExpectations(cluster1.get()));
EXPECT_TRUE(Mock::VerifyAndClearExpectations(cluster2.get()));
}

// Verify that shutting down the cluster manager destroys warming clusters.
TEST_F(ClusterManagerImplTest, ShutdownWithWarming) {
create(defaultConfig());
Expand Down
18 changes: 10 additions & 8 deletions test/common/upstream/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,26 @@ namespace Envoy {
namespace Upstream {
namespace {

inline std::string defaultStaticClusterJson(const std::string& name) {
return fmt::sprintf(R"EOF(
constexpr static const char* kDefaultStaticClusterTmpl = R"EOF(
{
"name": "%s",
"connect_timeout": "0.250s",
"type": "static",
"lb_policy": "round_robin",
"hosts": [
{
"socket_address": {
"address": "127.0.0.1",
"port_value": 11001
}
%s,
}
]
}
)EOF",
name);
)EOF";

inline std::string defaultStaticClusterJson(const std::string& name) {
return fmt::sprintf(kDefaultStaticClusterTmpl, name, R"EOF(
"socket_address": {
"address": "127.0.0.1",
"port_value": 11001
})EOF");
}

inline envoy::config::bootstrap::v2::Bootstrap
Expand Down