From 5c0d6728317f88aeb332ead19595494e4aa017d7 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Tue, 2 Jul 2024 21:45:42 +0300 Subject: [PATCH] Add test for proper handling of connection failure to avoid 'cannot wait on socket event without a socket' error (#8231) ## Problem See https://github.com/neondatabase/cloud/issues/14289 and PR #8210 ## Summary of changes Add test for problems fixed in #8210 ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --------- Co-authored-by: Konstantin Knizhnik --- pgxn/neon/libpagestore.c | 5 ---- .../regress/test_pageserver_reconnect.py | 24 +++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index a3fdcc537ead8..73a001b6ba72a 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -427,11 +427,6 @@ pageserver_connect(shardno_t shard_no, int elevel) values[n_pgsql_params] = NULL; shard->conn = PQconnectStartParams(keywords, values, 1); - if (!shard->conn) - { - neon_shard_log(shard_no, elevel, "Failed to connect to pageserver: out of memory"); - return false; - } if (PQstatus(shard->conn) == CONNECTION_BAD) { char *msg = pchomp(PQerrorMessage(shard->conn)); diff --git a/test_runner/regress/test_pageserver_reconnect.py b/test_runner/regress/test_pageserver_reconnect.py index aecfcdd262e5a..37ff923632d23 100644 --- a/test_runner/regress/test_pageserver_reconnect.py +++ b/test_runner/regress/test_pageserver_reconnect.py @@ -2,6 +2,7 @@ import time from contextlib import closing +import psycopg2.errors from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnv, PgBin @@ -40,3 +41,26 @@ def run_pgbench(connstr: str): c.execute("select pg_reload_conf()") thread.join() + + +# Test handling errors during page server reconnect +def test_pageserver_reconnect_failure(neon_simple_env: NeonEnv): + env = neon_simple_env + env.neon_cli.create_branch("test_pageserver_reconnect") + endpoint = env.endpoints.create_start("test_pageserver_reconnect") + + con = endpoint.connect() + cur = con.cursor() + + cur.execute("set statement_timeout='2s'") + cur.execute("SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'") + connstring = cur.fetchall()[0][0] + cur.execute( + f"alter system set neon.pageserver_connstring='{connstring}?some_invalid_param=xyz'" + ) + cur.execute("select pg_reload_conf()") + try: + cur.execute("select count(*) from pg_class") + except psycopg2.errors.QueryCanceled: + log.info("Connection to PS failed") + assert not endpoint.log_contains("ERROR: cannot wait on socket event without a socket.*")