From f94ee7a7a5f69c3bcfb8122b456a74ff9d15f880 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 27 Nov 2024 13:57:14 +0100 Subject: [PATCH] pageserver: add metrics for unknown `ClearVmBits` pages --- pageserver/src/metrics.rs | 7 +++++ pageserver/src/pgdatadir_mapping.rs | 17 ++++++++-- pageserver/src/walingest.rs | 49 +++++++++++++++++++++-------- 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 78a157f51e9f..86be97587fef 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -2144,6 +2144,7 @@ pub(crate) struct WalIngestMetrics { pub(crate) records_committed: IntCounter, pub(crate) records_filtered: IntCounter, pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter, + pub(crate) clear_vm_bits_unknown: IntCounterVec, } pub(crate) static WAL_INGEST: Lazy = Lazy::new(|| WalIngestMetrics { @@ -2172,6 +2173,12 @@ pub(crate) static WAL_INGEST: Lazy = Lazy::new(|| WalIngestMet "Total number of zero gap blocks written on relation extends" ) .expect("failed to define a metric"), + clear_vm_bits_unknown: register_int_counter_vec!( + "pageserver_wal_ingest_clear_vm_bits_unknown", + "Number of ignored ClearVmBits operations due to unknown pages/relations", + &["entity"], + ) + .expect("failed to define a metric"), }); pub(crate) static WAL_REDO_TIME: Lazy = Lazy::new(|| { diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 4f42427276a8..d48a1ba117fc 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -392,7 +392,9 @@ impl Timeline { result } - // Get size of a database in blocks + /// Get size of a database in blocks. This is only accurate on shard 0. It will undercount on + /// other shards, by only accounting for relations the shard has pages for, and only accounting + /// for pages up to the highest page number it has stored. pub(crate) async fn get_db_size( &self, spcnode: Oid, @@ -411,7 +413,10 @@ impl Timeline { Ok(total_blocks) } - /// Get size of a relation file + /// Get size of a relation file. The relation must exist, otherwise an error is returned. + /// + /// This is only accurate on shard 0. On other shards, it will return the size up to the highest + /// page number stored in the shard. pub(crate) async fn get_rel_size( &self, tag: RelTag, @@ -447,7 +452,10 @@ impl Timeline { Ok(nblocks) } - /// Does relation exist? + /// Does the relation exist? + /// + /// Only shard 0 has a full view of the relations. Other shards only know about relations that + /// the shard stores pages for. pub(crate) async fn get_rel_exists( &self, tag: RelTag, @@ -481,6 +489,9 @@ impl Timeline { /// Get a list of all existing relations in given tablespace and database. /// + /// Only shard 0 has a full view of the relations. Other shards only know about relations that + /// the shard stores pages for. + /// /// # Cancel-Safety /// /// This method is cancellation-safe. diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs index ad6ccbc85466..d568da596ab7 100644 --- a/pageserver/src/walingest.rs +++ b/pageserver/src/walingest.rs @@ -334,14 +334,32 @@ impl WalIngest { // replaying it would fail to find the previous image of the page, because // it doesn't exist. So check if the VM page(s) exist, and skip the WAL // record if it doesn't. - let vm_size = get_relsize(modification, vm_rel, ctx).await?; + // + // TODO: analyze the metrics and tighten this up accordingly. This logic + // implicitly assumes that VM pages see explicit WAL writes before + // implicit ClearVmBits, and will otherwise silently drop updates. + let Some(vm_size) = get_relsize(modification, vm_rel, ctx).await? else { + WAL_INGEST + .clear_vm_bits_unknown + .with_label_values(&["relation"]) + .inc(); + return Ok(()); + }; if let Some(blknum) = new_vm_blk { if blknum >= vm_size { + WAL_INGEST + .clear_vm_bits_unknown + .with_label_values(&["new_page"]) + .inc(); new_vm_blk = None; } } if let Some(blknum) = old_vm_blk { if blknum >= vm_size { + WAL_INGEST + .clear_vm_bits_unknown + .with_label_values(&["old_page"]) + .inc(); old_vm_blk = None; } } @@ -572,7 +590,8 @@ impl WalIngest { modification.put_rel_page_image_zero(rel, fsm_physical_page_no)?; fsm_physical_page_no += 1; } - let nblocks = get_relsize(modification, rel, ctx).await?; + // TODO: re-examine the None case here wrt. sharding; should we error? + let nblocks = get_relsize(modification, rel, ctx).await?.unwrap_or(0); if nblocks > fsm_physical_page_no { // check if something to do: FSM is larger than truncate position self.put_rel_truncation(modification, rel, fsm_physical_page_no, ctx) @@ -612,7 +631,8 @@ impl WalIngest { )?; vm_page_no += 1; } - let nblocks = get_relsize(modification, rel, ctx).await?; + // TODO: re-examine the None case here wrt. sharding; should we error? + let nblocks = get_relsize(modification, rel, ctx).await?.unwrap_or(0); if nblocks > vm_page_no { // check if something to do: VM is larger than truncate position self.put_rel_truncation(modification, rel, vm_page_no, ctx) @@ -1430,24 +1450,27 @@ impl WalIngest { } } +/// Returns the size of the relation as of this modification, or None if the relation doesn't exist. +/// +/// This is only accurate on shard 0. On other shards, it will return the size up to the highest +/// page number stored in the shard, or None if the shard does not have any pages for it. async fn get_relsize( modification: &DatadirModification<'_>, rel: RelTag, ctx: &RequestContext, -) -> Result { - let nblocks = if !modification +) -> Result, PageReconstructError> { + if !modification .tline .get_rel_exists(rel, Version::Modified(modification), ctx) .await? { - 0 - } else { - modification - .tline - .get_rel_size(rel, Version::Modified(modification), ctx) - .await? - }; - Ok(nblocks) + return Ok(None); + } + modification + .tline + .get_rel_size(rel, Version::Modified(modification), ctx) + .await + .map(Some) } #[allow(clippy::bool_assert_comparison)]