Skip to content

Commit

Permalink
Merge branch 'main' into yuchen/fix-lsn-lease-size-test
Browse files Browse the repository at this point in the history
  • Loading branch information
yliang412 authored Jul 8, 2024
2 parents 416d886 + df3dc6e commit cf0b3b7
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 68 deletions.
3 changes: 3 additions & 0 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,9 @@ impl From<CreateImageLayersError> for CompactionError {
fn from(e: CreateImageLayersError) -> Self {
match e {
CreateImageLayersError::Cancelled => CompactionError::ShuttingDown,
CreateImageLayersError::Other(e) => {
CompactionError::Other(e.context("create image layers"))
}
_ => CompactionError::Other(e.into()),
}
}
Expand Down
9 changes: 8 additions & 1 deletion storage_controller/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4062,7 +4062,14 @@ impl Service {
placement_policy: Some(PlacementPolicy::Attached(0)), // No secondaries, for convenient debug/hacking

// There is no way to know what the tenant's config was: revert to defaults
config: TenantConfig::default(),
//
// TODO: remove `switch_aux_file_policy` once we finish auxv2 migration
//
// we write to both v1+v2 storage, so that the test case can use either storage format for testing
config: TenantConfig {
switch_aux_file_policy: Some(models::AuxFilePolicy::CrossValidation),
..TenantConfig::default()
},
})
.await?;

Expand Down
182 changes: 115 additions & 67 deletions test_runner/regress/test_pg_regress.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@

import pytest
from fixtures.neon_fixtures import (
Endpoint,
NeonEnv,
NeonEnvBuilder,
check_restored_datadir_content,
tenant_get_shards,
)
from fixtures.pg_version import PgVersion
from fixtures.remote_storage import s3_storage
Expand All @@ -21,6 +24,97 @@
from pytest import CaptureFixture


TENANT_CONF = {
# Scaled down thresholds so that we are exercising the pageserver beyond just writing
# ephemeral/L0 layers, and because debug-mode code is slow to read from full sized ephemeral layer files.
"pitr_interval": "60s",
"checkpoint_distance": f"{8 * 1024 * 1024}",
"compaction_target_size": f"{8 * 1024 * 1024}",
}

# # Ensure that compaction works, on a timeline containing all the diversity that postgres regression tests create.
# # There should have been compactions mid-test as well, this final check is in addition those.
# for (shard, pageserver) in tenant_get_shards(env, env.initial_tenant):
# pageserver.http_client().timeline_checkpoint(env.initial_tenant, env.initial_timeline, force_repartition=True, force_image_layer_creation=True)


def post_checks(env: NeonEnv, test_output_dir: Path, db_name: str, endpoint: Endpoint):
"""
After running some opaque tests that create interesting content in a timeline, run
some generic integrity checks that the storage stack is able to reproduce the written
data properly.
"""

ignored_files: Optional[list[str]] = None

# Neon handles unlogged relations in a special manner. During a
# basebackup, we ship the init fork as the main fork. This presents a
# problem in that the endpoint's data directory and the basebackup will
# have differences and will fail the eventual file comparison.
#
# Unlogged tables were introduced in version 9.1. ALTER TABLE grew
# support for setting the persistence of a table in 9.5. The reason that
# this doesn't affect versions < 15 (but probably would between 9.1 and
# 9.5) is that all the regression tests that deal with unlogged tables
# up until that point dropped the unlogged tables or set them to logged
# at some point during the test.
#
# In version 15, Postgres grew support for unlogged sequences, and with
# that came a few more regression tests. These tests did not all drop
# the unlogged tables/sequences prior to finishing.
#
# But unlogged sequences came with a bug in that, sequences didn't
# inherit the persistence of their "parent" tables if they had one. This
# was fixed and backported to 15, thus exacerbating our problem a bit.
#
# So what we can do is just ignore file differences between the data
# directory and basebackup for unlogged relations.
results = cast(
"list[tuple[str, str]]",
endpoint.safe_psql(
"""
SELECT
relkind,
pg_relation_filepath(
pg_filenode_relation(reltablespace, relfilenode)
) AS unlogged_relation_paths
FROM pg_class
WHERE relpersistence = 'u'
""",
dbname=db_name,
),
)

unlogged_relation_files: list[str] = []
for r in results:
unlogged_relation_files.append(r[1])
# This is related to the following Postgres commit:
#
# commit ccadf73163ca88bdaa74b8223d4dde05d17f550b
# Author: Heikki Linnakangas <heikki.linnakangas@iki.fi>
# Date: 2023-08-23 09:21:31 -0500
#
# Use the buffer cache when initializing an unlogged index.
#
# This patch was backpatched to 16. Without it, the LSN in the
# page header would be 0/0 in the data directory, which wouldn't
# match the LSN generated during the basebackup, thus creating
# a difference.
if env.pg_version <= PgVersion.V15 and r[0] == "i":
unlogged_relation_files.append(f"{r[1]}_init")

ignored_files = unlogged_relation_files

check_restored_datadir_content(test_output_dir, env, endpoint, ignored_files=ignored_files)

# Ensure that compaction works, on a timeline containing all the diversity that postgres regression tests create.
# There should have been compactions mid-test as well, this final check is in addition those.
for shard, pageserver in tenant_get_shards(env, env.initial_tenant):
pageserver.http_client().timeline_checkpoint(
shard, env.initial_timeline, force_repartition=True, force_image_layer_creation=True
)


# Run the main PostgreSQL regression tests, in src/test/regress.
#
@pytest.mark.timeout(600)
Expand All @@ -45,7 +139,10 @@ def test_pg_regress(

neon_env_builder.enable_pageserver_remote_storage(s3_storage())
neon_env_builder.enable_scrub_on_exit()
env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)
env = neon_env_builder.init_start(
initial_tenant_conf=TENANT_CONF,
initial_tenant_shard_count=shard_count,
)

# Connect to postgres and create a database called "regression".
endpoint = env.endpoints.create_start("main")
Expand Down Expand Up @@ -84,67 +181,7 @@ def test_pg_regress(
with capsys.disabled():
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)

ignored_files: Optional[list[str]] = None

# Neon handles unlogged relations in a special manner. During a
# basebackup, we ship the init fork as the main fork. This presents a
# problem in that the endpoint's data directory and the basebackup will
# have differences and will fail the eventual file comparison.
#
# Unlogged tables were introduced in version 9.1. ALTER TABLE grew
# support for setting the persistence of a table in 9.5. The reason that
# this doesn't affect versions < 15 (but probably would between 9.1 and
# 9.5) is that all the regression tests that deal with unlogged tables
# up until that point dropped the unlogged tables or set them to logged
# at some point during the test.
#
# In version 15, Postgres grew support for unlogged sequences, and with
# that came a few more regression tests. These tests did not all drop
# the unlogged tables/sequences prior to finishing.
#
# But unlogged sequences came with a bug in that, sequences didn't
# inherit the persistence of their "parent" tables if they had one. This
# was fixed and backported to 15, thus exacerbating our problem a bit.
#
# So what we can do is just ignore file differences between the data
# directory and basebackup for unlogged relations.
results = cast(
"list[tuple[str, str]]",
endpoint.safe_psql(
"""
SELECT
relkind,
pg_relation_filepath(
pg_filenode_relation(reltablespace, relfilenode)
) AS unlogged_relation_paths
FROM pg_class
WHERE relpersistence = 'u'
""",
dbname=DBNAME,
),
)

unlogged_relation_files: list[str] = []
for r in results:
unlogged_relation_files.append(r[1])
# This is related to the following Postgres commit:
#
# commit ccadf73163ca88bdaa74b8223d4dde05d17f550b
# Author: Heikki Linnakangas <heikki.linnakangas@iki.fi>
# Date: 2023-08-23 09:21:31 -0500
#
# Use the buffer cache when initializing an unlogged index.
#
# This patch was backpatched to 16. Without it, the LSN in the
# page header would be 0/0 in the data directory, which wouldn't
# match the LSN generated during the basebackup, thus creating
# a difference.
if env.pg_version <= PgVersion.V15 and r[0] == "i":
unlogged_relation_files.append(f"{r[1]}_init")

ignored_files = unlogged_relation_files

check_restored_datadir_content(test_output_dir, env, endpoint, ignored_files=ignored_files)
post_checks(env, test_output_dir, DBNAME, endpoint)


# Run the PostgreSQL "isolation" tests, in src/test/isolation.
Expand All @@ -159,16 +196,20 @@ def test_isolation(
pg_distrib_dir: Path,
shard_count: Optional[int],
):
DBNAME = "isolation_regression"

if shard_count is not None:
neon_env_builder.num_pageservers = shard_count
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
neon_env_builder.enable_scrub_on_exit()
env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)
env = neon_env_builder.init_start(
initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count
)

# Connect to postgres and create a database called "regression".
# isolation tests use prepared transactions, so enable them
endpoint = env.endpoints.create_start("main", config_lines=["max_prepared_transactions=100"])
endpoint.safe_psql("CREATE DATABASE isolation_regression")
endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")

# Create some local directories for pg_isolation_regress to run in.
runpath = test_output_dir / "regress"
Expand Down Expand Up @@ -202,6 +243,9 @@ def test_isolation(
with capsys.disabled():
pg_bin.run(pg_isolation_regress_command, env=env_vars, cwd=runpath)

# This fails with a mismatch on `pg_multixact/offsets/0000`
# post_checks(env, test_output_dir, DBNAME, endpoint)


# Run extra Neon-specific pg_regress-based tests. The tests and their
# schedule file are in the sql_regress/ directory.
Expand All @@ -215,15 +259,19 @@ def test_sql_regress(
pg_distrib_dir: Path,
shard_count: Optional[int],
):
DBNAME = "regression"

if shard_count is not None:
neon_env_builder.num_pageservers = shard_count
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
neon_env_builder.enable_scrub_on_exit()
env = neon_env_builder.init_start(initial_tenant_shard_count=shard_count)
env = neon_env_builder.init_start(
initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count
)

# Connect to postgres and create a database called "regression".
endpoint = env.endpoints.create_start("main")
endpoint.safe_psql("CREATE DATABASE regression")
endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")

# Create some local directories for pg_regress to run in.
runpath = test_output_dir / "regress"
Expand Down Expand Up @@ -258,4 +306,4 @@ def test_sql_regress(
with capsys.disabled():
pg_bin.run(pg_regress_command, env=env_vars, cwd=runpath)

check_restored_datadir_content(test_output_dir, env, endpoint)
post_checks(env, test_output_dir, DBNAME, endpoint)

0 comments on commit cf0b3b7

Please sign in to comment.