diff --git a/src/yb/integration-tests/tablet-split-itest-base.cc b/src/yb/integration-tests/tablet-split-itest-base.cc index 273c5ab7c296..269448b122fc 100644 --- a/src/yb/integration-tests/tablet-split-itest-base.cc +++ b/src/yb/integration-tests/tablet-split-itest-base.cc @@ -1012,7 +1012,7 @@ Status TabletSplitExternalMiniClusterITest::SplitTabletCrashMaster( RETURN_NOT_OK(FlushTestTable()); // Split tablet should crash before creating either tablet - RETURN_NOT_OK(cluster_->SetFlagOnMasters("TEST_crash_after_creating_single_split_tablet", "1.0")); + RETURN_NOT_OK(cluster_->SetFlagOnMasters("TEST_crash_after_registering_split_tablets", "1.0")); // Retrieve split key from a leader peer if (split_partition_key) { @@ -1027,7 +1027,7 @@ Status TabletSplitExternalMiniClusterITest::SplitTabletCrashMaster( } RETURN_NOT_OK(RestartAllMasters(cluster_.get())); - RETURN_NOT_OK(cluster_->SetFlagOnMasters("TEST_crash_after_creating_single_split_tablet", "0.0")); + RETURN_NOT_OK(cluster_->SetFlagOnMasters("TEST_crash_after_registering_split_tablets", "0.0")); if (change_split_boundary) { RETURN_NOT_OK(WriteRows(kNumRows * 2, kNumRows)); diff --git a/src/yb/integration-tests/tablet-split-itest.cc b/src/yb/integration-tests/tablet-split-itest.cc index d999f6d93982..9f9ebc80c778 100644 --- a/src/yb/integration-tests/tablet-split-itest.cc +++ b/src/yb/integration-tests/tablet-split-itest.cc @@ -161,7 +161,7 @@ DECLARE_uint64(prevent_split_for_ttl_tables_for_seconds); DECLARE_bool(sort_automatic_tablet_splitting_candidates); DECLARE_int32(intents_flush_max_delay_ms); DECLARE_int32(index_block_restart_interval); -DECLARE_bool(TEST_error_after_creating_single_split_tablet); +DECLARE_bool(TEST_error_after_registering_split_tablets); DECLARE_bool(TEST_pause_before_send_hinted_election); DECLARE_bool(TEST_skip_election_when_fail_detected); DECLARE_int32(scheduled_full_compaction_frequency_hours); @@ -3255,7 +3255,7 @@ class TabletSplitSingleServerITestWithPartition : TEST_P(TabletSplitSingleServerITestWithPartition, TestSplitEncodedKeyAfterBreakInTheMiddleOfSplit) { // Make catalog manager to do only Upsert in order to emulate master error/crash behaviour in the // middle of split. Restart is required to be sure the flags change is seen at the master's side. - ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_error_after_creating_single_split_tablet) = true; + ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_error_after_registering_split_tablets) = true; ASSERT_OK(cluster_->RestartSync()); SetNumTablets(1); @@ -3285,7 +3285,7 @@ TEST_P(TabletSplitSingleServerITestWithPartition, TestSplitEncodedKeyAfterBreakI ASSERT_NOK(status) << "Corresponding split is expected to fail!"; // Reset flag to emulate partitions re-calculation. - ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_error_after_creating_single_split_tablet) = false; + ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_error_after_registering_split_tablets) = false; ASSERT_OK(cluster_->RestartSync()); // Split should pass without any error. diff --git a/src/yb/master/catalog_manager.cc b/src/yb/master/catalog_manager.cc index 42062fcfa31c..176f620161fa 100644 --- a/src/yb/master/catalog_manager.cc +++ b/src/yb/master/catalog_manager.cc @@ -460,12 +460,12 @@ DEFINE_test_flag(int32, slowdown_alter_table_rpcs_ms, 0, DEFINE_test_flag(bool, reject_delete_not_serving_tablet_rpc, false, "Whether to reject DeleteNotServingTablet RPC."); -DEFINE_test_flag(double, crash_after_creating_single_split_tablet, 0.0, - "Crash inside CatalogManager::RegisterNewTabletForSplit after calling Upsert."); +DEFINE_test_flag(double, crash_after_registering_split_tablets, 0.0, + "Crash inside CatalogManager::RegisterNewTabletsForSplit after calling Upsert."); -DEFINE_test_flag(bool, error_after_creating_single_split_tablet, false, - "Return an error inside CatalogManager::RegisterNewTabletForSplit " - "after calling Upsert."); +DEFINE_test_flag(bool, error_after_registering_split_tablets, false, + "Return an error inside CatalogManager::RegisterNewTabletsForSplit " + "after calling Upsert."); DEFINE_RUNTIME_bool(enable_delete_truncate_xcluster_replicated_table, false, "When set, enables deleting/truncating YCQL tables currently in xCluster replication. " @@ -497,9 +497,6 @@ DEFINE_RUNTIME_bool(batch_ysql_system_tables_metadata, true, "Whether change metadata operation and SysCatalogTable upserts for ysql system tables during a " "create database is performed one by one or batched together"); -DEFINE_test_flag(bool, pause_split_child_registration, - false, "Pause split after registering one child"); - DEFINE_test_flag(bool, keep_docdb_table_on_ysql_drop_table, false, "When enabled does not delete tables from the docdb layer, resulting in YSQL " "tables only being dropped in the postgres layer."); @@ -3056,15 +3053,14 @@ Status CatalogManager::DoSplitTablet( } // Add new split children to the sys catalog. - for (auto& new_tablet : new_tablets) { - RETURN_NOT_OK(RegisterNewTabletForSplit( - source_tablet_info.get(), new_tablet, epoch, &source_table_lock, &source_tablet_lock)); - } + RETURN_NOT_OK(RegisterNewTabletsForSplit( + source_tablet_info.get(), new_tablets, epoch, &source_table_lock, &source_tablet_lock)); + source_tablet_lock.Commit(); source_table_lock.Commit(); } - // At this point, the tablets exist in the table but not in the tablet map. Callers should retry - // if the tablets are not found in the tablet map. + // At this point, the tablets exist in the table but not in the tablet map. Users that need these + // tablets should retry until the tablets are inserted into the tablet map. MAYBE_FAULT(FLAGS_TEST_fault_crash_after_registering_split_children); @@ -7401,54 +7397,54 @@ Status CatalogManager::IsAlterTableDone(const IsAlterTableDoneRequestPB* req, return Status::OK(); } -Status CatalogManager::RegisterNewTabletForSplit( - TabletInfo* source_tablet_info, const TabletInfoPtr& new_tablet, const LeaderEpoch& epoch, - TableInfo::WriteLock* table_write_lock, TabletInfo::WriteLock* tablet_write_lock) { +Status CatalogManager::RegisterNewTabletsForSplit( + TabletInfo* source_tablet_info, const std::vector& new_tablets, + const LeaderEpoch& epoch, TableInfo::WriteLock* table_write_lock, + TabletInfo::WriteLock* parent_write_lock) { const auto tablet_lock = source_tablet_info->LockForRead(); auto table = source_tablet_info->table(); const auto& source_tablet_meta = tablet_lock->pb; - - auto& new_tablet_meta = new_tablet->mutable_metadata()->mutable_dirty()->pb; - new_tablet_meta.mutable_committed_consensus_state()->CopyFrom( - source_tablet_meta.committed_consensus_state()); const auto new_split_depth = source_tablet_meta.split_depth() + 1; - new_tablet_meta.set_split_depth(new_split_depth); - new_tablet_meta.set_split_parent_tablet_id(source_tablet_info->tablet_id()); - // TODO(tsplit): consider and handle failure scenarios, for example: - // - Crash or leader failover before sending out the split tasks. - // - Long enough partition while trying to send out the splits so that they timeout and - // not get executed. + for (auto& new_tablet : new_tablets) { + auto& new_tablet_meta = new_tablet->mutable_metadata()->mutable_dirty()->pb; + new_tablet_meta.mutable_committed_consensus_state()->CopyFrom( + source_tablet_meta.committed_consensus_state()); + new_tablet_meta.set_split_depth(new_split_depth); + new_tablet_meta.set_split_parent_tablet_id(source_tablet_info->tablet_id()); + + parent_write_lock->mutable_data()->pb.add_split_tablet_ids(new_tablet->id()); + } + int new_partition_list_version; auto& table_pb = table_write_lock->mutable_data()->pb; new_partition_list_version = table_pb.partition_list_version() + 1; table_pb.set_partition_list_version(new_partition_list_version); - tablet_write_lock->mutable_data()->pb.add_split_tablet_ids(new_tablet->id()); - RETURN_NOT_OK(sys_catalog_->Upsert(epoch, table, new_tablet, source_tablet_info)); + RETURN_NOT_OK(sys_catalog_->Upsert(epoch, table, new_tablets, source_tablet_info)); - TEST_PAUSE_IF_FLAG(TEST_pause_split_child_registration); - MAYBE_FAULT(FLAGS_TEST_crash_after_creating_single_split_tablet); - if (PREDICT_FALSE(FLAGS_TEST_error_after_creating_single_split_tablet)) { + MAYBE_FAULT(FLAGS_TEST_crash_after_registering_split_tablets); + if (PREDICT_FALSE(FLAGS_TEST_error_after_registering_split_tablets)) { return STATUS(IllegalState, "TEST: error happened while registering a new tablet."); } + // This has to be done while the table lock is held since TableInfo::partitions_ must be updated // at the same time as the partition list version. - RETURN_NOT_OK(table->AddTablet(new_tablet)); - - const PartitionPB& partition = new_tablet->metadata().state().pb.partition(); - LOG(INFO) << "Registered new tablet " << new_tablet->tablet_id() << " (partition_key_start: " - << Slice(partition.partition_key_start()).ToDebugString(/* max_length = */ 64) - << ", partition_key_end: " - << Slice(partition.partition_key_end()).ToDebugString(/* max_length = */ 64) - << ", split_depth: " << new_split_depth << ") to split the tablet " - << source_tablet_info->tablet_id() << " (" << AsString(source_tablet_meta.partition()) - << ") for table " << table->ToString() - << ", new partition_list_version: " << new_partition_list_version; - + RETURN_NOT_OK(table->AddTablets(new_tablets)); // TODO: We use this pattern in other places, but what if concurrent thread accesses not yet // committed TabletInfo from the `table` ? - new_tablet->mutable_metadata()->CommitMutation(); + for (auto& new_tablet : new_tablets) { + const PartitionPB& partition = new_tablet->metadata().state().pb.partition(); + LOG(INFO) << "Registered new tablet " << new_tablet->tablet_id() << " (partition_key_start: " + << Slice(partition.partition_key_start()).ToDebugString(/* max_length = */ 64) + << ", partition_key_end: " + << Slice(partition.partition_key_end()).ToDebugString(/* max_length = */ 64) + << ", split_depth: " << new_split_depth << ") to split the tablet " + << source_tablet_info->tablet_id() << " (" << AsString(source_tablet_meta.partition()) + << ") for table " << table->ToString() + << ", new partition_list_version: " << new_partition_list_version; + new_tablet->mutable_metadata()->CommitMutation(); + } return Status::OK(); } diff --git a/src/yb/master/catalog_manager.h b/src/yb/master/catalog_manager.h index dce20e507953..824b15ed0e93 100644 --- a/src/yb/master/catalog_manager.h +++ b/src/yb/master/catalog_manager.h @@ -2035,12 +2035,12 @@ class CatalogManager : public tserver::TabletPeerLookupIf, // partitions_vtable_cache_refresh_secs seconds. void RebuildYQLSystemPartitions(); - // Registers `new_tablet` for the same table as `source_tablet_info` tablet. + // Registers `new_tablets` for the same table as `source_tablet_info` tablet. // Does not change any other tablets and their partitions. - Status RegisterNewTabletForSplit( - TabletInfo* source_tablet_info, const TabletInfoPtr& new_tablet, const LeaderEpoch& epoch, - TableInfo::WriteLock* table_write_lock, TabletInfo::WriteLock* tablet_write_lock) - EXCLUDES(mutex_); + Status RegisterNewTabletsForSplit( + TabletInfo* source_tablet_info, const std::vector& new_tablets, + const LeaderEpoch& epoch, TableInfo::WriteLock* table_write_lock, + TabletInfo::WriteLock* tablet_write_lock) EXCLUDES(mutex_); Result GetTabletInfoUnlocked(const TabletId& tablet_id) REQUIRES_SHARED(mutex_); diff --git a/src/yb/tools/yb-admin-snapshot-schedule-test.cc b/src/yb/tools/yb-admin-snapshot-schedule-test.cc index cb6842a51c18..79d3fdedc956 100644 --- a/src/yb/tools/yb-admin-snapshot-schedule-test.cc +++ b/src/yb/tools/yb-admin-snapshot-schedule-test.cc @@ -4117,12 +4117,6 @@ TEST_F(YbAdminRestoreDuringSplit, RestoreBeforeGetSplitKey) { ASSERT_OK(RunTest(1 /* expected num tablets after restore */)); } -// Restore to a time after one of the child tablets is registered by the master. -TEST_F(YbAdminRestoreDuringSplit, RestoreAfterOneChildRegistered) { - SetDelayFlag("TEST_pause_split_child_registration", /* set on master */ true); - ASSERT_OK(RunTest(1 /* expected num tablets after restore */)); -} - // Restore to a time after both the child tablets are registered by the master but // before the SPLIT_OP is applied. TEST_F(YbAdminRestoreDuringSplit, RestoreBeforeSplitOpIsApplied) {