Skip to content

Commit

Permalink
[BACKPORT 2.14][#20919] docdb: fix leaderless tablet endpoint for RF-…
Browse files Browse the repository at this point in the history
…1 setup

Summary:
Original commit: 4113566 / D32180
For rf-1 setup, leader's ht lease reported to the master is constantly `InfiniteWatermarkForLocalPeer` (max of uint64_t).
So condition `ht_lease_exp > existing_leader_lease_info->ht_lease_expiration` can only pass once at the first time the master received the metrics containing this tablet.

Fix this condition to also cover `ht_lease_exp == existing_leader_lease_info->ht_lease_expiration`.
Jira: DB-9900

Test Plan:
MasterPathHandlersLeaderlessRF1ITest.TestRF1
MasterPathHandlersLeaderlessRF3ITest.*

Reviewers: asrivastava

Reviewed By: asrivastava

Subscribers: bogdan, ybase

Tags: #jenkins-ready

Differential Revision: https://phorge.dev.yugabyte.com/D32473
  • Loading branch information
Huqicheng committed Mar 8, 2024
1 parent eafea03 commit 10f365f
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 9 deletions.
6 changes: 5 additions & 1 deletion src/yb/integration-tests/external_mini_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ Status ExternalMiniCluster::Start(rpc::Messenger* messenger) {
CHECK(tablet_servers_.empty()) << "Tablet servers are not empty (size: "
<< tablet_servers_.size() << "). Maybe you meant Restart()?";
RETURN_NOT_OK(HandleOptions());
FLAGS_replication_factor = narrow_cast<int>(opts_.num_masters);
FLAGS_replication_factor =
opts_.replication_factor > 0 ? opts_.replication_factor : narrow_cast<int>(opts_.num_masters);

if (messenger == nullptr) {
rpc::MessengerBuilder builder("minicluster-messenger");
Expand Down Expand Up @@ -1214,6 +1215,9 @@ Status ExternalMiniCluster::StartMasters() {
// Disable WAL fsync for tests
flags.push_back("--durable_wal_write=false");
flags.push_back("--enable_leader_failure_detection=true");
if (opts_.replication_factor > 0) {
flags.push_back(Format("--replication_factor=$0", opts_.replication_factor));
}
// Limit number of transaction table tablets to help avoid timeouts.
int num_transaction_table_tablets = NumTabletsPerTransactionTable(opts_);
flags.push_back(Substitute("--transaction_table_num_tablets=$0", num_transaction_table_tablets));
Expand Down
4 changes: 4 additions & 0 deletions src/yb/integration-tests/external_mini_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,10 @@ struct ExternalMiniClusterOptions {
// set to a non-zero value, this value is used instead.
int transaction_table_num_tablets = 0;

// Specifies the replication factor for the cluster. If this is not set, default to the number
// of masters in the cluster.
int replication_factor = 0;

Status RemovePort(const uint16_t port);
Status AddPort(const uint16_t port);

Expand Down
57 changes: 50 additions & 7 deletions src/yb/integration-tests/master_path_handlers-itest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ class MasterPathHandlersBaseItest : public YBMiniClusterTestBase<T> {
return kNumMasters;
}

virtual int num_tablet_servers() const {
return kNumTservers;
}

std::shared_ptr<client::YBTable> CreateTestTable(const int num_tablets = 0) {
auto client = CHECK_RESULT(cluster_->CreateClient());
CHECK_OK(client->CreateNamespaceIfNotExists(kKeyspaceName));
Expand Down Expand Up @@ -167,7 +171,7 @@ class MasterPathHandlersItest : public MasterPathHandlersBaseItest<MiniCluster>
MiniClusterOptions opts;
// Set low heartbeat timeout.
ANNOTATE_UNPROTECTED_WRITE(FLAGS_tserver_unresponsive_timeout_ms) = 5000;
opts.num_tablet_servers = kNumTservers;
opts.num_tablet_servers = num_tablet_servers();
opts.num_masters = num_masters();
cluster_.reset(new MiniCluster(opts));
ASSERT_OK(cluster_->Start());
Expand Down Expand Up @@ -338,22 +342,30 @@ class MasterPathHandlersExternalItest : public MasterPathHandlersBaseItest<Exter
ExternalMiniClusterOptions opts_;

void SetUp() override {
opts_.num_tablet_servers = kNumTservers;
opts_.num_tablet_servers = num_tablet_servers();
opts_.num_masters = num_masters();
opts_.extra_tserver_flags.push_back("--placement_cloud=c");
opts_.extra_tserver_flags.push_back("--placement_region=r");
opts_.extra_tserver_flags.push_back("--placement_zone=z${index}");
opts_.extra_tserver_flags.push_back("--placement_uuid=" + kLivePlacementUuid);
opts_.extra_tserver_flags.push_back("--follower_unavailable_considered_failed_sec=10");
const auto rf = opts_.replication_factor > 0 ? opts_.replication_factor : 3;
opts_.extra_master_flags.push_back(Format("--replication_factor=$0", rf));

MasterPathHandlersBaseItest<ExternalMiniCluster>::SetUp();

yb_admin_client_ = std::make_unique<tools::ClusterAdminClient>(
cluster_->GetMasterAddresses(), MonoDelta::FromSeconds(30));
ASSERT_OK(yb_admin_client_->Init());

// 3 live replicas, one in each zone.
ASSERT_OK(yb_admin_client_->ModifyPlacementInfo("c.r.z0:1,c.r.z1:1,c.r.z2:1", 3,
std::string placement_infos;
for (int i = 0; i < rf; i++) {
placement_infos += Format("c.r.z$0:1", i);
if (i < rf - 1) {
placement_infos += ",";
}
}
ASSERT_OK(yb_admin_client_->ModifyPlacementInfo(placement_infos, rf,
kLivePlacementUuid));
}

Expand Down Expand Up @@ -615,9 +627,18 @@ TEST_F_EX(MasterPathHandlersItest, TestTablePlacementInfo, MasterPathHandlersExt
ASSERT_EQ(table_str.substr(pos + 22, 11), "anotherzone");
}

template <int RF>
class MasterPathHandlersLeaderlessITest : public MasterPathHandlersExternalItest {
protected:
int num_tablet_servers() const override {
return RF;
}
int num_masters() const override {
return RF;
}
public:
void SetUp() override {
opts_.replication_factor = RF;
opts_.extra_tserver_flags.push_back(
{Format("--tserver_heartbeat_metrics_interval_ms=$0", kTserverHeartbeatMetricsIntervalMs)});
MasterPathHandlersExternalItest::SetUp();
Expand Down Expand Up @@ -664,7 +685,10 @@ class MasterPathHandlersLeaderlessITest : public MasterPathHandlersExternalItest
static constexpr int kTserverHeartbeatMetricsIntervalMs = 1000;
};

TEST_F(MasterPathHandlersLeaderlessITest, TestLeaderlessTabletEndpoint) {
typedef MasterPathHandlersLeaderlessITest<3> MasterPathHandlersLeaderlessRF3ITest;
typedef MasterPathHandlersLeaderlessITest<1> MasterPathHandlersLeaderlessRF1ITest;

TEST_F(MasterPathHandlersLeaderlessRF3ITest, TestLeaderlessTabletEndpoint) {
ASSERT_OK(cluster_->SetFlagOnMasters("leaderless_tablet_alert_delay_secs", "5"));
CreateSingleTabletTestTable();
auto tablet_id = GetSingleTabletId();
Expand Down Expand Up @@ -729,7 +753,7 @@ TEST_F(MasterPathHandlersLeaderlessITest, TestLeaderlessTabletEndpoint) {
}, 20s * kTimeMultiplier, "leaderless tablet endpoint becomes empty"));
}

TEST_F(MasterPathHandlersLeaderlessITest, TestLeaderChange) {
TEST_F(MasterPathHandlersLeaderlessRF3ITest, TestLeaderChange) {
const auto kMaxLeaderLeaseExpiredMs = 5000;
const auto kHtLeaseDurationMs = 2000;
const auto kMaxTabletWithoutValidLeaderMs = 5000;
Expand Down Expand Up @@ -788,7 +812,7 @@ TEST_F(MasterPathHandlersLeaderlessITest, TestLeaderChange) {
ASSERT_EQ(result.find(tablet_id), string::npos);
}

TEST_F(MasterPathHandlersLeaderlessITest, TestAllFollowers) {
TEST_F(MasterPathHandlersLeaderlessRF3ITest, TestAllFollowers) {
const auto kMaxTabletWithoutValidLeaderMs = 5000;
ASSERT_OK(cluster_->SetFlagOnMasters("leaderless_tablet_alert_delay_secs",
std::to_string(kMaxTabletWithoutValidLeaderMs / 1000)));
Expand Down Expand Up @@ -833,6 +857,25 @@ TEST_F(MasterPathHandlersLeaderlessITest, TestAllFollowers) {
}, 20s * kTimeMultiplier, "leaderless tablet endpoint becomes empty"));
}

TEST_F(MasterPathHandlersLeaderlessRF1ITest, TestRF1) {
const auto kLeaderlessTabletAlertDelaySecs = 5;
ASSERT_OK(cluster_->SetFlagOnMasters("leaderless_tablet_alert_delay_secs",
std::to_string(kLeaderlessTabletAlertDelaySecs)));
CreateSingleTabletTestTable();
auto tablet_id = GetSingleTabletId();

SleepFor((kLeaderlessTabletAlertDelaySecs + 1) * 1s);
string result = GetLeaderlessTabletsString();
ASSERT_EQ(result.find(tablet_id), string::npos);

ASSERT_OK(cluster_->tablet_server(0)->Pause());
SleepFor((kLeaderlessTabletAlertDelaySecs + 1) * 1s);
result = GetLeaderlessTabletsString();
ASSERT_NE(result.find(tablet_id), string::npos);

ASSERT_OK(cluster_->tablet_server(0)->Resume());
}

TEST_F(MasterPathHandlersItest, TestLeaderlessDeletedTablet) {
auto table = CreateTestTable(kNumTablets);
const auto kLeaderlessTabletAlertDelaySecs = 5;
Expand Down
2 changes: 1 addition & 1 deletion src/yb/master/catalog_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11919,7 +11919,7 @@ void CatalogManager::ProcessTabletMetadata(
// If the reported ht lease from the leader is expired for more than
// FLAGS_maximum_tablet_leader_lease_expired_secs, the leader shouldn't be treated
// as a valid leader.
if (ht_lease_exp > existing_leader_lease_info->ht_lease_expiration &&
if (ht_lease_exp >= existing_leader_lease_info->ht_lease_expiration &&
!IsHtLeaseExpiredForTooLong(
master_->clock()->Now().GetPhysicalValueMicros(), ht_lease_exp)) {
tablet->UpdateLastTimeWithValidLeader();
Expand Down

0 comments on commit 10f365f

Please sign in to comment.