Skip to content

Commit

Permalink
[#24396] YSQL: Harden Postgres shutdown and restart behavior
Browse files Browse the repository at this point in the history
Summary:
The yb-tserver starts Postgres process and monitors it. If Postgres crashes then the yb-tserver restarts it.
Postgres writes two lock files `postmaster.pid`,and `<socket>.lock` with its `pid` information. On a Postgres restart it checks to see if these files exist and if the processes indicated by them are still running and fails its startup to avoid running concurrent instances of Postgres.
In linux `pid` (process id) and `tid` (thread id) share the same numbering space, so if a tserver thread picks up the number then it causes Postgres to enter a incorrect crash loop.

This change includes the following changes to yb-tserver that ensure we cleanly handle hung Postgres process, and stale lock files:
- Handle both the `postmaster.pid` and `<socket>.lock` files.
- Only kill the pid if it is a Postgres process.
- Detect stuck postgres processes in mac using `proc_pidinfo`.
- Kill hung Postgres and cleanup lock files before each Postgres startup instead of doing it only once at tserver startup.
- Use `SIGQUIT` as `YB_PG_PDEATHSIG` instead of `SIGINT`. This causes Postmaster to enter "Immediate Shutdown" where it kills other backend with `SIGQUIT` reducing the chances of a hung Postgres. Also setting `report_thread_leaks=0` for TSAN to suppress thread leak errors. Postgres invokes `_exit` which will terminate the entire group, and not leak any threads.
- Use `quickdie` in pg_cron background worker to keep it inline with the pg_cron launcher worker.
Jira: DB-13307

Test Plan: PgWrapperOneNodeClusterTest, TestPostgresLockFiles

Reviewers: slingam, kramanathan, smishra, telgersma

Reviewed By: kramanathan, telgersma

Subscribers: yql, ybase

Differential Revision: https://phorge.dev.yugabyte.com/D39207
  • Loading branch information
hari90 committed Oct 23, 2024
1 parent 2ff8bbd commit 111b65d
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 112 deletions.
2 changes: 1 addition & 1 deletion src/postgres/third-party-extensions/pg_cron/src/pg_cron.c
Original file line number Diff line number Diff line change
Expand Up @@ -2157,7 +2157,7 @@ CronBackgroundWorker(Datum main_arg)
shm_mq_handle *responseq;

/* handle SIGTERM like regular backend */
pqsignal(SIGTERM, die);
pqsignal(SIGTERM, quickdie);
/* YB Note: Exit immediately. */
pqsignal(SIGQUIT, quickdie);
BackgroundWorkerUnblockSignals();
Expand Down
6 changes: 6 additions & 0 deletions src/yb/util/pg_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include "yb/util/format.h"
#include "yb/util/hash_util.h"
#include "yb/util/path_util.h"

DECLARE_string(tmp_dir);

Expand Down Expand Up @@ -105,4 +106,9 @@ std::string PgDeriveSocketDir(const HostPort& host_port) {
return path;
}

std::string PgDeriveSocketLockFile(const HostPort& host_port) {
return JoinPathSegments(
PgDeriveSocketDir(host_port), Format(".s.PGSQL.$0.lock", host_port.port()));
}

} // namespace yb
2 changes: 2 additions & 0 deletions src/yb/util/pg_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ namespace yb {

std::string PgDeriveSocketDir(const HostPort& host_port);

std::string PgDeriveSocketLockFile(const HostPort& host_port);

} // namespace yb
136 changes: 77 additions & 59 deletions src/yb/yql/pgwrapper/pg_wrapper-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
#include "yb/util/backoff_waiter.h"
#include "yb/util/flags.h"
#include "yb/util/logging.h"
#include "yb/util/logging_test_util.h"
#include "yb/util/path_util.h"
#include "yb/util/pg_util.h"
#include "yb/util/result.h"
#include "yb/util/to_stream.h"
#include "yb/util/tsan_util.h"
Expand Down Expand Up @@ -334,71 +336,87 @@ class PgWrapperOneNodeClusterTest : public PgWrapperTest {
}
};

TEST_F(PgWrapperOneNodeClusterTest, TestPostgresPid) {
TEST_F(PgWrapperOneNodeClusterTest, TestPostgresLockFiles) {
MonoDelta timeout = 15s;
int tserver_count = 1;

std::string pid_file = JoinPathSegments(pg_ts->GetRootDir(), "pg_data", "postmaster.pid");
// Wait for postgres server to start and setup postmaster.pid file
ASSERT_OK(LoggedWaitFor(
[this, &pid_file] {
return env_->FileExists(pid_file);
}, timeout, "Waiting for postgres server to create postmaster.pid file"));
ASSERT_TRUE(env_->FileExists(pid_file));
const auto pid_file = JoinPathSegments(pg_ts->GetRootDir(), "pg_data", "postmaster.pid");

// Shutdown tserver and wait for postgres server to shut down and delete postmaster.pid file
pg_ts->Shutdown();
ASSERT_OK(LoggedWaitFor(
[this, &pid_file] {
return !env_->FileExists(pid_file);
}, timeout, "Waiting for postgres server to shutdown"));
ASSERT_FALSE(env_->FileExists(pid_file));

// Create empty postmaster.pid file and ensure that tserver can start up
// Use sync_on_close flag to ensure that the file is flushed to disk when tserver tries to read it
std::unique_ptr<RWFile> file;
RWFileOptions opts;
opts.sync_on_close = true;
opts.mode = Env::CREATE_IF_NON_EXISTING_TRUNCATE;

ASSERT_OK(env_->NewRWFile(opts, pid_file, &file));
ASSERT_OK(pg_ts->Start(false /* start_cql_proxy */));
ASSERT_OK(cluster_->WaitForTabletServerCount(tserver_count, timeout));
ASSERT_OK(WaitForPostgresToStart(timeout));
std::vector<std::string> lock_files;
lock_files.push_back(pid_file);
lock_files.emplace_back(
PgDeriveSocketLockFile(HostPort(pg_ts->bind_host(), pg_ts->pgsql_rpc_port())));

// Shutdown tserver and wait for postgres server to shutdown and delete postmaster.pid file
pg_ts->Shutdown();
ASSERT_OK(LoggedWaitFor(
[this, &pid_file] {
return !env_->FileExists(pid_file);
}, timeout, "Waiting for postgres server to shutdown", 100ms));
ASSERT_FALSE(env_->FileExists(pid_file));

// Create postmaster.pid file with string pid (invalid) and ensure that tserver can start up
ASSERT_OK(env_->NewRWFile(opts, pid_file, &file));
ASSERT_OK(file->Write(0, "abcde\n" + pid_file));
ASSERT_OK(file->Close());

ASSERT_OK(pg_ts->Start(false /* start_cql_proxy */));
ASSERT_OK(cluster_->WaitForTabletServerCount(tserver_count, timeout));
ASSERT_OK(WaitForPostgresToStart(timeout));
auto validate_files = [&lock_files, env = Env::Default()](bool exists) -> Status {
for (const auto& lock_file : lock_files) {
SCHECK_EQ(
exists, env->FileExists(lock_file), IllegalState,
Format("$0 file $1found", lock_file, (exists ? " not " : "")));
}
return Status::OK();
};

auto write_lock_files = [this, &lock_files](const std::string& data) -> Status {
// Use sync_on_close flag to ensure that the file is flushed to disk when tserver tries to read
// it.
RWFileOptions opts;
opts.sync_on_close = true;
opts.mode = Env::CREATE_IF_NON_EXISTING_TRUNCATE;

LOG(INFO) << "Writing lock files with data: \n'" << data << "'";

for (const auto& lock_file : lock_files) {
std::unique_ptr<RWFile> file;
RETURN_NOT_OK(env_->NewRWFile(opts, lock_file, &file));
if (!data.empty()) {
RETURN_NOT_OK(file->Write(0, data));
}
RETURN_NOT_OK(file->Close());
}
return Status::OK();
};

// Shutdown tserver and wait for postgres server to shutdown and delete postmaster.pid file
pg_ts->Shutdown();
ASSERT_OK(LoggedWaitFor(
[this, &pid_file] {
return !env_->FileExists(pid_file);
}, timeout, "Waiting for postgres server to shutdown", 100ms));
ASSERT_FALSE(env_->FileExists(pid_file));

// Create postgres pid file with integer pid (valid) and ensure that tserver can start up
ASSERT_OK(env_->NewRWFile(opts, pid_file, &file));
ASSERT_OK(file->Write(0, "1002\n" + pid_file));
ASSERT_OK(file->Close());

ASSERT_OK(pg_ts->Start(false /* start_cql_proxy */));
ASSERT_OK(cluster_->WaitForTabletServerCount(tserver_count, timeout));
auto restart_tserver_and_validate = [&](const std::string& lock_file_data) -> Status {
pg_ts->Shutdown();
RETURN_NOT_OK(WaitFor(
[&]() -> Result<bool> { return validate_files(/*exists=*/false).ok(); }, timeout,
"Lock files to clear"));

RETURN_NOT_OK(write_lock_files(lock_file_data));

RETURN_NOT_OK(pg_ts->Start(false /* start_cql_proxy */));
RETURN_NOT_OK(cluster_->WaitForTabletServerCount(GetNumTabletServers(), timeout));
RETURN_NOT_OK(WaitForPostgresToStart(timeout));
RETURN_NOT_OK(validate_files(/*exists=*/true));

return Status::OK();
};

// Wait for postgres server to start and setup lock files.
ASSERT_OK(WaitForPostgresToStart(timeout));
ASSERT_OK(validate_files(/*exists=*/true));

// Test empty lock files.
ASSERT_OK(restart_tserver_and_validate(""));

// Test lock files with string pid (invalid).
ASSERT_OK(restart_tserver_and_validate("abcde\n" + pid_file));

const auto killMsg = "Killing older postgres process";

// Test lock files with a random integer pid (valid).
{
auto kill_mgs_watcher = StringWaiterLogSink(killMsg);
ASSERT_OK(restart_tserver_and_validate("1002\n" + pid_file));
ASSERT_FALSE(kill_mgs_watcher.IsEventOccurred());
}

// Test lock files with integer pid (our own pid) and ensure that tserver does not kill us.
{
const auto my_pid = getpid();
auto kill_mgs_watcher = StringWaiterLogSink(killMsg);
ASSERT_OK(restart_tserver_and_validate(Format("$0\n$1", my_pid, pid_file)));
ASSERT_FALSE(kill_mgs_watcher.IsEventOccurred());
}
}

class PgWrapperSingleNodeLongTxnTest : public PgWrapperOneNodeClusterTest {
Expand Down
Loading

0 comments on commit 111b65d

Please sign in to comment.