Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pageserver: add metrics for unknown ClearVmBits pages #9911

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2144,6 +2144,7 @@ pub(crate) struct WalIngestMetrics {
pub(crate) records_committed: IntCounter,
pub(crate) records_filtered: IntCounter,
pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
pub(crate) clear_vm_bits_unknown: IntCounterVec,
}

pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
Expand Down Expand Up @@ -2172,6 +2173,12 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMet
"Total number of zero gap blocks written on relation extends"
)
.expect("failed to define a metric"),
clear_vm_bits_unknown: register_int_counter_vec!(
"pageserver_wal_ingest_clear_vm_bits_unknown",
"Number of ignored ClearVmBits operations due to unknown pages/relations",
&["entity"],
)
.expect("failed to define a metric"),
});

pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
Expand Down
17 changes: 14 additions & 3 deletions pageserver/src/pgdatadir_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,9 @@ impl Timeline {
result
}

// Get size of a database in blocks
/// Get size of a database in blocks. This is only accurate on shard 0. It will undercount on
/// other shards, by only accounting for relations the shard has pages for, and only accounting
/// for pages up to the highest page number it has stored.
pub(crate) async fn get_db_size(
&self,
spcnode: Oid,
Expand All @@ -411,7 +413,10 @@ impl Timeline {
Ok(total_blocks)
}

/// Get size of a relation file
/// Get size of a relation file. The relation must exist, otherwise an error is returned.
///
/// This is only accurate on shard 0. On other shards, it will return the size up to the highest
/// page number stored in the shard.
pub(crate) async fn get_rel_size(
&self,
tag: RelTag,
Expand Down Expand Up @@ -447,7 +452,10 @@ impl Timeline {
Ok(nblocks)
}

/// Does relation exist?
/// Does the relation exist?
///
/// Only shard 0 has a full view of the relations. Other shards only know about relations that
/// the shard stores pages for.
pub(crate) async fn get_rel_exists(
&self,
tag: RelTag,
Expand Down Expand Up @@ -481,6 +489,9 @@ impl Timeline {

/// Get a list of all existing relations in given tablespace and database.
///
/// Only shard 0 has a full view of the relations. Other shards only know about relations that
/// the shard stores pages for.
///
/// # Cancel-Safety
///
/// This method is cancellation-safe.
Expand Down
49 changes: 36 additions & 13 deletions pageserver/src/walingest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,14 +334,32 @@ impl WalIngest {
// replaying it would fail to find the previous image of the page, because
// it doesn't exist. So check if the VM page(s) exist, and skip the WAL
// record if it doesn't.
let vm_size = get_relsize(modification, vm_rel, ctx).await?;
//
// TODO: analyze the metrics and tighten this up accordingly. This logic
// implicitly assumes that VM pages see explicit WAL writes before
// implicit ClearVmBits, and will otherwise silently drop updates.
let Some(vm_size) = get_relsize(modification, vm_rel, ctx).await? else {
WAL_INGEST
.clear_vm_bits_unknown
.with_label_values(&["relation"])
.inc();
return Ok(());
};
if let Some(blknum) = new_vm_blk {
if blknum >= vm_size {
WAL_INGEST
.clear_vm_bits_unknown
.with_label_values(&["new_page"])
.inc();
new_vm_blk = None;
}
}
if let Some(blknum) = old_vm_blk {
if blknum >= vm_size {
WAL_INGEST
.clear_vm_bits_unknown
.with_label_values(&["old_page"])
.inc();
old_vm_blk = None;
}
}
Expand Down Expand Up @@ -572,7 +590,8 @@ impl WalIngest {
modification.put_rel_page_image_zero(rel, fsm_physical_page_no)?;
fsm_physical_page_no += 1;
}
let nblocks = get_relsize(modification, rel, ctx).await?;
// TODO: re-examine the None case here wrt. sharding; should we error?
let nblocks = get_relsize(modification, rel, ctx).await?.unwrap_or(0);
if nblocks > fsm_physical_page_no {
// check if something to do: FSM is larger than truncate position
self.put_rel_truncation(modification, rel, fsm_physical_page_no, ctx)
Expand Down Expand Up @@ -612,7 +631,8 @@ impl WalIngest {
)?;
vm_page_no += 1;
}
let nblocks = get_relsize(modification, rel, ctx).await?;
// TODO: re-examine the None case here wrt. sharding; should we error?
let nblocks = get_relsize(modification, rel, ctx).await?.unwrap_or(0);
if nblocks > vm_page_no {
// check if something to do: VM is larger than truncate position
self.put_rel_truncation(modification, rel, vm_page_no, ctx)
Expand Down Expand Up @@ -1430,24 +1450,27 @@ impl WalIngest {
}
}

/// Returns the size of the relation as of this modification, or None if the relation doesn't exist.
///
/// This is only accurate on shard 0. On other shards, it will return the size up to the highest
/// page number stored in the shard, or None if the shard does not have any pages for it.
async fn get_relsize(
modification: &DatadirModification<'_>,
rel: RelTag,
ctx: &RequestContext,
) -> Result<BlockNumber, PageReconstructError> {
let nblocks = if !modification
) -> Result<Option<BlockNumber>, PageReconstructError> {
if !modification
.tline
.get_rel_exists(rel, Version::Modified(modification), ctx)
.await?
{
0
} else {
modification
.tline
.get_rel_size(rel, Version::Modified(modification), ctx)
.await?
};
Ok(nblocks)
return Ok(None);
}
modification
.tline
.get_rel_size(rel, Version::Modified(modification), ctx)
.await
.map(Some)
}

#[allow(clippy::bool_assert_comparison)]
Expand Down
Loading