Skip to content

Commit

Permalink
pageserver: move things around to prepare for decoding logic (#9504)
Browse files Browse the repository at this point in the history
## Problem

We wish to have high level WAL decoding logic in `wal_decoder::decoder`
module.

## Summary of Changes

For this we need the `Value` and `NeonWalRecord` types accessible there, so:
1. Move `Value` and `NeonWalRecord` to `pageserver::value` and
`pageserver::record` respectively.
2. Get rid of `pageserver::repository` (follow up from (1))
3. Move PG specific WAL record types to `postgres_ffi::walrecord`. In
theory they could live in `wal_decoder`, but it would create a circular
dependency between `wal_decoder` and `postgres_ffi`. Long term it makes
sense for those types to be PG version specific, so that will work out nicely.
4. Move higher level WAL record types (to be ingested by pageserver)
into `wal_decoder::models`

Related: #9335
Epic: #9329
  • Loading branch information
VladLazar authored Oct 29, 2024
1 parent 62f5d48 commit 07b9744
Show file tree
Hide file tree
Showing 45 changed files with 1,024 additions and 905 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ members = [
"libs/postgres_ffi/wal_craft",
"libs/vm_monitor",
"libs/walproposer",
"libs/wal_decoder",
]

[workspace.package]
Expand Down Expand Up @@ -238,6 +239,7 @@ tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
utils = { version = "0.1", path = "./libs/utils/" }
vm_monitor = { version = "0.1", path = "./libs/vm_monitor/" }
walproposer = { version = "0.1", path = "./libs/walproposer/" }
wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }

## Common library dependency
workspace_hack = { version = "0.1", path = "./workspace_hack/" }
Expand Down
2 changes: 2 additions & 0 deletions libs/pageserver_api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ pub mod controller_api;
pub mod key;
pub mod keyspace;
pub mod models;
pub mod record;
pub mod reltag;
pub mod shard;
/// Public API types
pub mod upcall_api;
pub mod value;

pub mod config;
113 changes: 113 additions & 0 deletions libs/pageserver_api/src/record.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
//! This module defines the WAL record format used within the pageserver.
use bytes::Bytes;
use postgres_ffi::walrecord::{describe_postgres_wal_record, MultiXactMember};
use postgres_ffi::{MultiXactId, MultiXactOffset, TimestampTz, TransactionId};
use serde::{Deserialize, Serialize};
use utils::bin_ser::DeserializeError;

/// Each update to a page is represented by a NeonWalRecord. It can be a wrapper
/// around a PostgreSQL WAL record, or a custom neon-specific "record".
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum NeonWalRecord {
/// Native PostgreSQL WAL record
Postgres { will_init: bool, rec: Bytes },

/// Clear bits in heap visibility map. ('flags' is bitmap of bits to clear)
ClearVisibilityMapFlags {
new_heap_blkno: Option<u32>,
old_heap_blkno: Option<u32>,
flags: u8,
},
/// Mark transaction IDs as committed on a CLOG page
ClogSetCommitted {
xids: Vec<TransactionId>,
timestamp: TimestampTz,
},
/// Mark transaction IDs as aborted on a CLOG page
ClogSetAborted { xids: Vec<TransactionId> },
/// Extend multixact offsets SLRU
MultixactOffsetCreate {
mid: MultiXactId,
moff: MultiXactOffset,
},
/// Extend multixact members SLRU.
MultixactMembersCreate {
moff: MultiXactOffset,
members: Vec<MultiXactMember>,
},
/// Update the map of AUX files, either writing or dropping an entry
AuxFile {
file_path: String,
content: Option<Bytes>,
},

/// A testing record for unit testing purposes. It supports append data to an existing image, or clear it.
#[cfg(feature = "testing")]
Test {
/// Append a string to the image.
append: String,
/// Clear the image before appending.
clear: bool,
/// Treat this record as an init record. `clear` should be set to true if this field is set
/// to true. This record does not need the history WALs to reconstruct. See [`NeonWalRecord::will_init`] and
/// its references in `timeline.rs`.
will_init: bool,
},
}

impl NeonWalRecord {
/// Does replaying this WAL record initialize the page from scratch, or does
/// it need to be applied over the previous image of the page?
pub fn will_init(&self) -> bool {
// If you change this function, you'll also need to change ValueBytes::will_init
match self {
NeonWalRecord::Postgres { will_init, rec: _ } => *will_init,
#[cfg(feature = "testing")]
NeonWalRecord::Test { will_init, .. } => *will_init,
// None of the special neon record types currently initialize the page
_ => false,
}
}

#[cfg(feature = "testing")]
pub fn wal_append(s: impl AsRef<str>) -> Self {
Self::Test {
append: s.as_ref().to_string(),
clear: false,
will_init: false,
}
}

#[cfg(feature = "testing")]
pub fn wal_clear() -> Self {
Self::Test {
append: "".to_string(),
clear: true,
will_init: false,
}
}

#[cfg(feature = "testing")]
pub fn wal_init() -> Self {
Self::Test {
append: "".to_string(),
clear: true,
will_init: true,
}
}
}

/// Build a human-readable string to describe a WAL record
///
/// For debugging purposes
pub fn describe_wal_record(rec: &NeonWalRecord) -> Result<String, DeserializeError> {
match rec {
NeonWalRecord::Postgres { will_init, rec } => Ok(format!(
"will_init: {}, {}",
will_init,
describe_postgres_wal_record(rec)?
)),
_ => Ok(format!("{:?}", rec)),
}
}
80 changes: 17 additions & 63 deletions pageserver/src/repository.rs → libs/pageserver_api/src/value.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
use crate::walrecord::NeonWalRecord;
use anyhow::Result;
//! This module defines the value type used by the storage engine.
//!
//! A [`Value`] represents either a completely new value for one Key ([`Value::Image`]),
//! or a "delta" of how to get from previous version of the value to the new one
//! ([`Value::WalRecord`]])
//!
//! Note that the [`Value`] type is used for the permananent storage format, so any
//! changes to it must be backwards compatible.
use crate::record::NeonWalRecord;
use bytes::Bytes;
use serde::{Deserialize, Serialize};
use std::ops::AddAssign;
use std::time::Duration;

pub use pageserver_api::key::{Key, KEY_SIZE};

/// A 'value' stored for a one Key.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum Value {
/// An Image value contains a full copy of the value
Expand All @@ -20,10 +23,12 @@ pub enum Value {
}

impl Value {
#[inline(always)]
pub fn is_image(&self) -> bool {
matches!(self, Value::Image(_))
}

#[inline(always)]
pub fn will_init(&self) -> bool {
match self {
Value::Image(_) => true,
Expand All @@ -33,17 +38,18 @@ impl Value {
}

#[derive(Debug, PartialEq)]
pub(crate) enum InvalidInput {
pub enum InvalidInput {
TooShortValue,
TooShortPostgresRecord,
}

/// We could have a ValueRef where everything is `serde(borrow)`. Before implementing that, lets
/// use this type for querying if a slice looks some particular way.
pub(crate) struct ValueBytes;
pub struct ValueBytes;

impl ValueBytes {
pub(crate) fn will_init(raw: &[u8]) -> Result<bool, InvalidInput> {
#[inline(always)]
pub fn will_init(raw: &[u8]) -> Result<bool, InvalidInput> {
if raw.len() < 12 {
return Err(InvalidInput::TooShortValue);
}
Expand Down Expand Up @@ -79,6 +85,7 @@ impl ValueBytes {
mod test {
use super::*;

use bytes::Bytes;
use utils::bin_ser::BeSer;

macro_rules! roundtrip {
Expand Down Expand Up @@ -229,56 +236,3 @@ mod test {
assert!(!ValueBytes::will_init(&expected).unwrap());
}
}

///
/// Result of performing GC
///
#[derive(Default, Serialize, Debug)]
pub struct GcResult {
pub layers_total: u64,
pub layers_needed_by_cutoff: u64,
pub layers_needed_by_pitr: u64,
pub layers_needed_by_branches: u64,
pub layers_needed_by_leases: u64,
pub layers_not_updated: u64,
pub layers_removed: u64, // # of layer files removed because they have been made obsolete by newer ondisk files.

#[serde(serialize_with = "serialize_duration_as_millis")]
pub elapsed: Duration,

/// The layers which were garbage collected.
///
/// Used in `/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc` to wait for the layers to be
/// dropped in tests.
#[cfg(feature = "testing")]
#[serde(skip)]
pub(crate) doomed_layers: Vec<crate::tenant::storage_layer::Layer>,
}

// helper function for `GcResult`, serializing a `Duration` as an integer number of milliseconds
fn serialize_duration_as_millis<S>(d: &Duration, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
d.as_millis().serialize(serializer)
}

impl AddAssign for GcResult {
fn add_assign(&mut self, other: Self) {
self.layers_total += other.layers_total;
self.layers_needed_by_pitr += other.layers_needed_by_pitr;
self.layers_needed_by_cutoff += other.layers_needed_by_cutoff;
self.layers_needed_by_branches += other.layers_needed_by_branches;
self.layers_needed_by_leases += other.layers_needed_by_leases;
self.layers_not_updated += other.layers_not_updated;
self.layers_removed += other.layers_removed;

self.elapsed += other.elapsed;

#[cfg(feature = "testing")]
{
let mut other = other;
self.doomed_layers.append(&mut other.doomed_layers);
}
}
}
1 change: 1 addition & 0 deletions libs/postgres_ffi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ memoffset.workspace = true
thiserror.workspace = true
serde.workspace = true
utils.workspace = true
tracing.workspace = true

[dev-dependencies]
env_logger.workspace = true
Expand Down
1 change: 1 addition & 0 deletions libs/postgres_ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ macro_rules! enum_pgversion {

pub mod pg_constants;
pub mod relfile_utils;
pub mod walrecord;

// Export some widely used datatypes that are unlikely to change across Postgres versions
pub use v14::bindings::RepOriginId;
Expand Down
Loading

1 comment on commit 07b9744

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

5362 tests run: 5129 passed, 1 failed, 232 skipped (full report)


Failures on Postgres 16

  • test_compaction_l0_memory[github-actions-selfhosted]: release-x86-64
# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_compaction_l0_memory[release-pg16-github-actions-selfhosted]"

Code coverage* (full report)

  • functions: 31.3% (7682 of 24572 functions)
  • lines: 48.7% (60384 of 123980 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
07b9744 at 2024-10-29T11:33:31.314Z :recycle:

Please sign in to comment.