Skip to content

Commit 4d476f4

Browse files
committed
[yugabyte#9797] DocDB: Avoid catalog version going backward during online upgrade
Summary: Before and during a major YSQL version upgrade, through the heartbeat mechanism, the master provides current catalog versions to tservers from the PG11 catalog versions table. Note that DDLs are not allowed during a YSQL major version upgrade, and the PG15 catalogs are being updated by being set to semantic equivalents of PG11. To the PG11-only clients (tservers), there is only one valid catalog version state and that is the state when the upgrade began. While going through the upgrade flow, the C++ upgrade tests keep the cluster running while the YB master is switched out of upgrade mode. When the master is switched out of upgrade mode in this way, it switches to providing the current catalog versions from the PG15 catalog versions table, which is fresh and sets everything as version 1. Thus tservers see the version number going down and probabilistically crash themselves with the following error message: Ignoring ysql db 13245 catalog version update: new version too old. New: 1, Old: 2, ignored count: 31 Fix this incorrect state by copying the contents of the PG11 catalog versions table to PG15, right after the PG15 catalog is established, and before returning success of the catalog upgrade. Test Plan: Jenkins On MacOS arm64: ./yb_build.sh release --cxx-test pg15_upgrade-test --gtest_filter=Pg15UpgradeTest.Schemas Reviewers: hsunder Subscribers: ybase, yql Differential Revision: https://phorge.dev.yugabyte.com/D38961
1 parent 88a995f commit 4d476f4

File tree

4 files changed

+59
-10
lines changed

4 files changed

+59
-10
lines changed

pg15_tests/common_upgrade.sh

+16-6
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,26 @@ common_pg15_flags="TEST_always_return_consensus_info_for_succeeded_rpc=false,pg_
1313
# implemented.
1414
common_tserver_flags='"ysql_pg_conf_csv=yb_enable_expression_pushdown=false"'
1515

16+
pg11_enable_db_catalog_flag="allowed_preview_flags_csv=ysql_enable_db_catalog_version_mode,ysql_enable_db_catalog_version_mode=true"
17+
1618
# Downloads, runs, and pushds the directory for pg11.
1719
# Sets $pg11path to the pg11 directory.
1820
run_and_pushd_pg11() {
1921
prefix="/tmp"
20-
ybversion_pg11="2024.2.0.0"
21-
ybbuild="957e0756c1234079d7bda1cee74d468e7157f11e"
22+
ybversion_pg11="2.20.2.2"
23+
ybbuild="b1"
2224
if [[ $OSTYPE = linux* ]]; then
23-
arch="release-clang17-centos-x86_64"
25+
arch="linux-x86_64"
2426
tarbin="tar"
2527
fi
2628
if [[ $OSTYPE = darwin* ]]; then
27-
arch="release-clang-darwin-arm64"
29+
arch="darwin-x86_64"
2830
tarbin="gtar"
2931
fi
3032
ybfilename_pg11="yugabyte-$ybversion_pg11-$ybbuild-$arch.tar.gz"
3133

3234
if [ ! -f "$prefix"/"$ybfilename_pg11" ]; then
33-
curl "https://s3.us-west-2.amazonaws.com/uploads.dev.yugabyte.com/local-provider-test/$ybversion_pg11/$ybfilename_pg11" \
35+
curl "https://downloads.yugabyte.com/releases/$ybversion_pg11/$ybfilename_pg11" \
3436
-o "$prefix"/"$ybfilename_pg11"
3537
fi
3638

@@ -42,7 +44,15 @@ run_and_pushd_pg11() {
4244

4345
pg11path="$prefix/yugabyte-$ybversion_pg11"
4446
pushd "$pg11path"
45-
yb_ctl_destroy_create --rf=3 --tserver_flags="$common_tserver_flags"
47+
yb_ctl_destroy_create --rf=3
48+
ysqlsh <<EOT
49+
SET yb_non_ddl_txn_for_sys_tables_allowed=true;
50+
SELECT yb_fix_catalog_version_table(true);
51+
SET yb_non_ddl_txn_for_sys_tables_allowed = false;
52+
EOT
53+
yb_ctl restart \
54+
--tserver_flags="$common_tserver_flags,$pg11_enable_db_catalog_flag" \
55+
--master_flags="$pg11_enable_db_catalog_flag"
4656
}
4757

4858
upgrade_masters() {

pg15_tests/test_upgrade_rollback.sh

+9-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,15 @@ build/latest/bin/yb-admin --init_master_addrs=127.0.0.200:7100 list_tables inclu
2525
# Roll back
2626
# Restart node 2 tserver as PG11
2727
pushd $pg11path
28-
yb_ctl restart_node 2 --tserver_flags="$common_tserver_flags"
28+
# YB_TODO: Since D31087 isn't in the PG11 "from" build, we need to undo PG15's pg_data symlink
29+
# and restore the pg_data_11 directory to pg_data.
30+
# When D31087 is in the PG11 build, replace the lines from stop_node to start_node with:
31+
# yb_ctl restart_node 2
32+
# D31087 is present in 2.21 onwards, 2024.1 onwards.
33+
yb_ctl stop_node 2
34+
rm "$data_dir/node-2/disk-1/pg_data"
35+
mv "$data_dir/node-2/disk-1/pg_data_11" "$data_dir/node-2/disk-1/pg_data"
36+
yb_ctl start_node 2 --tserver_flags="$pg11_enable_db_catalog_flag" --master_flags="$pg11_enable_db_catalog_flag"
2937
popd
3038
# Issue the rollback RPC
3139
echo rollback starting at $(date +"%r")

src/yb/integration-tests/upgrade-tests/pg15_upgrade_test_base.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace yb {
1919

2020
class Pg15UpgradeTestBase : public UpgradeTestBase {
2121
public:
22-
Pg15UpgradeTestBase() : UpgradeTestBase(kBuild_2024_2_0_0) {}
22+
Pg15UpgradeTestBase() : UpgradeTestBase(kBuild_2_20_2_4) {}
2323
virtual ~Pg15UpgradeTestBase() override = default;
2424

2525
void SetUp() override;

src/yb/integration-tests/upgrade-tests/upgrade_test_base.cc

+33-2
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ void AddUnDefOkAndSetFlag(
201201
flag_list.emplace_back(Format("--$0=$1", flag_name, flag_value));
202202
}
203203

204+
void AddAllowedPreviewFlag(std::vector<std::string>& flag_list, const std::string& flag_name) {
205+
AddFlagToCsvFlag(flag_list, "allowed_preview_flags_csv", flag_name);
206+
}
207+
204208
void WaitForAutoFlagApply() { SleepFor(FLAGS_auto_flags_apply_delay_ms * 1ms + 3s); }
205209

206210
Status SetYsqlMajorUpgradeFlagOnMasters(ExternalMiniCluster& cluster, bool enable) {
@@ -220,9 +224,9 @@ UpgradeTestBase::UpgradeTestBase(const std::string& from_version)
220224
}
221225

222226
void UpgradeTestBase::SetUp() {
223-
if (old_version_info_.version != kBuild_2024_2_0_0) {
227+
if (old_version_info_.version != kBuild_2_20_2_4) {
224228
test_skipped_ = true;
225-
GTEST_SKIP() << "PG15 upgrade is only supported from version " << kBuild_2024_2_0_0;
229+
GTEST_SKIP() << "PG15 upgrade is only supported from version " << kBuild_2_20_2_4;
226230
}
227231

228232
if (IsSanitizer()) {
@@ -268,6 +272,15 @@ Status UpgradeTestBase::StartClusterInOldVersion(const ExternalMiniClusterOption
268272
AddUnDefOkAndSetFlag(
269273
opts.extra_tserver_flags, "TEST_always_return_consensus_info_for_succeeded_rpc", "false");
270274

275+
// YB_TODO: Enable ysql_enable_db_catalog_version_mode since it is not major version upgrade
276+
// safe.
277+
if (old_version_info_.version == kBuild_2_20_2_4) {
278+
AddAllowedPreviewFlag(opts.extra_master_flags, "ysql_enable_db_catalog_version_mode");
279+
AddUnDefOkAndSetFlag(opts.extra_master_flags, "ysql_enable_db_catalog_version_mode", "true");
280+
AddAllowedPreviewFlag(opts.extra_tserver_flags, "ysql_enable_db_catalog_version_mode");
281+
AddUnDefOkAndSetFlag(opts.extra_tserver_flags, "ysql_enable_db_catalog_version_mode", "true");
282+
}
283+
271284
LOG(INFO) << "Starting cluster in version: " << old_version_info_.version;
272285

273286
RETURN_NOT_OK(ExternalMiniClusterITestBase::StartCluster(opts));
@@ -296,6 +309,14 @@ Status UpgradeTestBase::StartClusterInOldVersion(const ExternalMiniClusterOption
296309
RETURN_NOT_OK(cluster_->AddAndSetExtraFlag("ysql_yb_enable_expression_pushdown", "false"));
297310
}
298311

312+
if (old_version_info_.version == kBuild_2_20_2_4) {
313+
// YB_TODO: Remove when the min upgrade-from version is 2024.1+.
314+
auto conn = VERIFY_RESULT(cluster_->ConnectToDB());
315+
RETURN_NOT_OK(conn.Execute("SET yb_non_ddl_txn_for_sys_tables_allowed=true"));
316+
RETURN_NOT_OK(conn.Fetch("SELECT yb_fix_catalog_version_table(true)"));
317+
RETURN_NOT_OK(conn.Execute("SET yb_non_ddl_txn_for_sys_tables_allowed=false"));
318+
}
319+
299320
return Status::OK();
300321
}
301322

@@ -616,6 +637,16 @@ Status UpgradeTestBase::RestartTServerInOldVersion(
616637
ExternalTabletServer& ts, bool wait_for_cluster_to_stabilize) {
617638
LOG(INFO) << "Restarting yb-tserver " << ts.id() << " in old version";
618639

640+
if (is_ysql_major_version_upgrade_ && old_version_info_.version == kBuild_2_20_2_4) {
641+
// YB_TODO: Remove this once we switch to a newer version.
642+
// 2.20 does not have the pg_data symlink changes so we need to manually delete the pg_data
643+
// folder and use the pg_data_11.
644+
auto env = Env::Default();
645+
const auto pg_data_dir = JoinPathSegments(ts.GetRootDir(), "pg_data");
646+
RETURN_NOT_OK(env->DeleteRecursively(pg_data_dir));
647+
RETURN_NOT_OK(env->RenameFile(JoinPathSegments(ts.GetRootDir(), "pg_data_11"), pg_data_dir));
648+
}
649+
619650
RETURN_NOT_OK(RestartDaemonInVersion(ts, old_version_tserver_bin_path_));
620651

621652
if (wait_for_cluster_to_stabilize) {

0 commit comments

Comments
 (0)