From 4113566690dc3bbf9c912796c0a2c258b4265c4a Mon Sep 17 00:00:00 2001 From: qhu Date: Mon, 5 Feb 2024 01:45:29 +0000 Subject: [PATCH] [#20919] docdb: fix leaderless tablet endpoint for RF-1 setup Summary: For rf-1 setup, leader's ht lease reported to the master is constantly `InfiniteWatermarkForLocalPeer` (max of uint64_t). So condition `ht_lease_exp > existing_leader_lease_info->ht_lease_expiration` can only pass once at the first time the master received the metrics containing this tablet. Fix this condition to also cover `ht_lease_exp == existing_leader_lease_info->ht_lease_expiration`. Jira: DB-9900 Test Plan: MasterPathHandlersLeaderlessRF1ITest.TestRF1 MasterPathHandlersLeaderlessRF3ITest.* Reviewers: asrivastava Reviewed By: asrivastava Subscribers: bogdan, ybase Differential Revision: https://phorge.dev.yugabyte.com/D32180 --- .../master_path_handlers-itest.cc | 49 +++++++++++++++++-- src/yb/master/catalog_manager.cc | 2 +- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/src/yb/integration-tests/master_path_handlers-itest.cc b/src/yb/integration-tests/master_path_handlers-itest.cc index a68639731aa5..2590ae7da4d4 100644 --- a/src/yb/integration-tests/master_path_handlers-itest.cc +++ b/src/yb/integration-tests/master_path_handlers-itest.cc @@ -703,6 +703,8 @@ class MasterPathHandlersExternalItest : public MasterPathHandlersBaseItest 0 ? opts_.replication_factor : 3; + opts_.extra_master_flags.push_back(Format("--replication_factor=$0", rf)); MasterPathHandlersBaseItest::SetUp(); @@ -710,8 +712,14 @@ class MasterPathHandlersExternalItest : public MasterPathHandlersBaseItestGetMasterAddresses(), MonoDelta::FromSeconds(30)); ASSERT_OK(yb_admin_client_->Init()); - // 3 live replicas, one in each zone. - ASSERT_OK(yb_admin_client_->ModifyPlacementInfo("c.r.z0:1,c.r.z1:1,c.r.z2:1", 3, + std::string placement_infos; + for (int i = 0; i < rf; i++) { + placement_infos += Format("c.r.z$0:1", i); + if (i < rf - 1) { + placement_infos += ","; + } + } + ASSERT_OK(yb_admin_client_->ModifyPlacementInfo(placement_infos, rf, kLivePlacementUuid)); } @@ -991,9 +999,18 @@ TEST_F_EX(MasterPathHandlersItest, TestTablePlacementInfo, MasterPathHandlersExt ASSERT_EQ(table_str.substr(pos + 22, 11), "anotherzone"); } +template class MasterPathHandlersLeaderlessITest : public MasterPathHandlersExternalItest { + protected: + int num_tablet_servers() const override { + return RF; + } + int num_masters() const override { + return RF; + } public: void SetUp() override { + opts_.replication_factor = RF; opts_.extra_tserver_flags.push_back( {Format("--tserver_heartbeat_metrics_interval_ms=$0", kTserverHeartbeatMetricsIntervalMs)}); MasterPathHandlersExternalItest::SetUp(); @@ -1040,7 +1057,10 @@ class MasterPathHandlersLeaderlessITest : public MasterPathHandlersExternalItest static constexpr int kTserverHeartbeatMetricsIntervalMs = 1000; }; -TEST_F(MasterPathHandlersLeaderlessITest, TestLeaderlessTabletEndpoint) { +typedef MasterPathHandlersLeaderlessITest<3> MasterPathHandlersLeaderlessRF3ITest; +typedef MasterPathHandlersLeaderlessITest<1> MasterPathHandlersLeaderlessRF1ITest; + +TEST_F(MasterPathHandlersLeaderlessRF3ITest, TestLeaderlessTabletEndpoint) { ASSERT_OK(cluster_->SetFlagOnMasters("leaderless_tablet_alert_delay_secs", "5")); CreateSingleTabletTestTable(); auto tablet_id = GetSingleTabletId(); @@ -1105,7 +1125,7 @@ TEST_F(MasterPathHandlersLeaderlessITest, TestLeaderlessTabletEndpoint) { }, 20s * kTimeMultiplier, "leaderless tablet endpoint becomes empty")); } -TEST_F(MasterPathHandlersLeaderlessITest, TestLeaderChange) { +TEST_F(MasterPathHandlersLeaderlessRF3ITest, TestLeaderChange) { const auto kMaxLeaderLeaseExpiredMs = 5000; const auto kHtLeaseDurationMs = 2000; const auto kMaxTabletWithoutValidLeaderMs = 5000; @@ -1164,7 +1184,7 @@ TEST_F(MasterPathHandlersLeaderlessITest, TestLeaderChange) { ASSERT_EQ(result.find(tablet_id), string::npos); } -TEST_F(MasterPathHandlersLeaderlessITest, TestAllFollowers) { +TEST_F(MasterPathHandlersLeaderlessRF3ITest, TestAllFollowers) { const auto kMaxTabletWithoutValidLeaderMs = 5000; ASSERT_OK(cluster_->SetFlagOnMasters("leaderless_tablet_alert_delay_secs", std::to_string(kMaxTabletWithoutValidLeaderMs / 1000))); @@ -1209,6 +1229,25 @@ TEST_F(MasterPathHandlersLeaderlessITest, TestAllFollowers) { }, 20s * kTimeMultiplier, "leaderless tablet endpoint becomes empty")); } +TEST_F(MasterPathHandlersLeaderlessRF1ITest, TestRF1) { + const auto kLeaderlessTabletAlertDelaySecs = 5; + ASSERT_OK(cluster_->SetFlagOnMasters("leaderless_tablet_alert_delay_secs", + std::to_string(kLeaderlessTabletAlertDelaySecs))); + CreateSingleTabletTestTable(); + auto tablet_id = GetSingleTabletId(); + + SleepFor((kLeaderlessTabletAlertDelaySecs + 1) * 1s); + string result = GetLeaderlessTabletsString(); + ASSERT_EQ(result.find(tablet_id), string::npos); + + ASSERT_OK(cluster_->tablet_server(0)->Pause()); + SleepFor((kLeaderlessTabletAlertDelaySecs + 1) * 1s); + result = GetLeaderlessTabletsString(); + ASSERT_NE(result.find(tablet_id), string::npos); + + ASSERT_OK(cluster_->tablet_server(0)->Resume()); +} + TEST_F(MasterPathHandlersItest, TestLeaderlessDeletedTablet) { auto table = CreateTestTable(kNumTablets); const auto kLeaderlessTabletAlertDelaySecs = 5; diff --git a/src/yb/master/catalog_manager.cc b/src/yb/master/catalog_manager.cc index 99e5fcf98611..330556237db5 100644 --- a/src/yb/master/catalog_manager.cc +++ b/src/yb/master/catalog_manager.cc @@ -13213,7 +13213,7 @@ void CatalogManager::ProcessTabletMetadata( // If the reported ht lease from the leader is expired for more than // FLAGS_maximum_tablet_leader_lease_expired_secs, the leader shouldn't be treated // as a valid leader. - if (ht_lease_exp > existing_leader_lease_info->ht_lease_expiration && + if (ht_lease_exp >= existing_leader_lease_info->ht_lease_expiration && !IsHtLeaseExpiredForTooLong( master_->clock()->Now().GetPhysicalValueMicros(), ht_lease_exp)) { tablet->UpdateLastTimeWithValidLeader();