Skip to content

Commit

Permalink
Require LSN on branch creation internally (partial fix #2063)
Browse files Browse the repository at this point in the history
This commit cleans up our internal APIs and leaves fewer places where one can forgot
to explicitly specify the LSN to branch from.

* `neon_local branch` still works without `--ancestor-start-lsn` and the user may bump
  into a race condition when branching right after SQL query is completed. We should
  query safekeepers for the most recent `commit_lsn` instead, just like in cloud.
* `TimelineCreateRequest` HTTP API for Pageserver now takes requires both `ancestor_timeline_id`
  and `ancestor_start_lsn` to be simultaneously present or absent. It was allowed to omit start LSN
  before.
  • Loading branch information
yeputons committed Dec 17, 2022
1 parent 61194ab commit a37052b
Show file tree
Hide file tree
Showing 8 changed files with 179 additions and 70 deletions.
27 changes: 20 additions & 7 deletions control_plane/src/bin/neon_local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use control_plane::local_env::LocalEnv;
use control_plane::pageserver::PageServerNode;
use control_plane::safekeeper::SafekeeperNode;
use control_plane::{broker, local_env};
use pageserver_api::models::TimelineInfo;
use pageserver_api::models::{TimelineAncestor, TimelineInfo};
use pageserver_api::{
DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR,
DEFAULT_PG_LISTEN_ADDR as DEFAULT_PAGESERVER_PG_ADDR,
Expand Down Expand Up @@ -373,7 +373,6 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
new_tenant_id,
new_timeline_id,
None,
None,
Some(pg_version),
)?;
let new_timeline_id = timeline_info.timeline_id;
Expand Down Expand Up @@ -428,7 +427,7 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
.context("Failed to parse postgres version from the argument string")?;

let timeline_info =
pageserver.timeline_create(tenant_id, None, None, None, Some(pg_version))?;
pageserver.timeline_create(tenant_id, None, None, Some(pg_version))?;
let new_timeline_id = timeline_info.timeline_id;

let last_record_lsn = timeline_info.last_record_lsn;
Expand Down Expand Up @@ -495,16 +494,30 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
anyhow!("Found no timeline id for branch name '{ancestor_branch_name}'")
})?;

let start_lsn = branch_match
let start_lsn = match branch_match
.get_one::<String>("ancestor-start-lsn")
.map(|lsn_str| Lsn::from_str(lsn_str))
.transpose()
.context("Failed to parse ancestor start Lsn from the request")?;
.context("Failed to parse ancestor start Lsn from the request")?
{
Some(x) => x,
None => {
// Although race condition is possible here, we do not warn about it for clearer output.
// The exact LSN of the new branch will be printed below. Still, may not help the user:
// https://github.com/neondatabase/neon/issues/2063
pageserver
.timeline_info(tenant_id, ancestor_timeline_id)
.unwrap()
.last_record_lsn
}
};
let timeline_info = pageserver.timeline_create(
tenant_id,
None,
start_lsn,
Some(ancestor_timeline_id),
Some(TimelineAncestor {
timeline_id: ancestor_timeline_id,
start_lsn,
}),
None,
)?;
let new_timeline_id = timeline_info.timeline_id;
Expand Down
36 changes: 24 additions & 12 deletions control_plane/src/pageserver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ use std::{io, result};

use anyhow::{bail, ensure, Context};
use pageserver_api::models::{
TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineAncestor, TimelineCreateRequest,
TimelineInfo,
};
use postgres_connection::{parse_host_port, PgConnectionConfig};
use reqwest::blocking::{Client, RequestBuilder, Response};
Expand Down Expand Up @@ -199,13 +200,8 @@ impl PageServerNode {
pg_version: u32,
) -> anyhow::Result<TimelineId> {
let initial_tenant_id = self.tenant_create(new_tenant_id, HashMap::new())?;
let initial_timeline_info = self.timeline_create(
initial_tenant_id,
new_timeline_id,
None,
None,
Some(pg_version),
)?;
let initial_timeline_info =
self.timeline_create(initial_tenant_id, new_timeline_id, None, Some(pg_version))?;
Ok(initial_timeline_info.timeline_id)
}

Expand Down Expand Up @@ -507,12 +503,28 @@ impl PageServerNode {
Ok(timeline_infos)
}

pub fn timeline_info(
&self,
tenant_id: TenantId,
timeline_id: TimelineId,
) -> anyhow::Result<TimelineInfo> {
self.http_request(
Method::GET,
format!("{}/tenant/{}/timeline/{}", self.http_base_url, tenant_id, timeline_id),
)
.send()?
.error_from_body()?
.json::<TimelineInfo>()
.with_context(|| {
format!("Failed to retrieve or parse timeline info response for tenant {tenant_id} and timeline {timeline_id}")
})
}

pub fn timeline_create(
&self,
tenant_id: TenantId,
new_timeline_id: Option<TimelineId>,
ancestor_start_lsn: Option<Lsn>,
ancestor_timeline_id: Option<TimelineId>,
ancestor: Option<TimelineAncestor>,
pg_version: Option<u32>,
) -> anyhow::Result<TimelineInfo> {
self.http_request(
Expand All @@ -521,8 +533,8 @@ impl PageServerNode {
)
.json(&TimelineCreateRequest {
new_timeline_id,
ancestor_start_lsn,
ancestor_timeline_id,
ancestor_timeline_id: ancestor.as_ref().map(|x| x.timeline_id),
ancestor_start_lsn: ancestor.map(|x| x.start_lsn),
pg_version,
})
.send()?
Expand Down
33 changes: 32 additions & 1 deletion libs/pageserver_api/src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,21 +59,52 @@ pub enum TimelineState {
Broken,
}

// It is impossible to specify timeline as an ancestor without also
// specifying the branching LSN.
#[serde_as]
#[derive(Serialize, Deserialize)]
pub struct TimelineAncestor {
pub timeline_id: TimelineId,
pub start_lsn: Lsn,
}

#[serde_as]
#[derive(Serialize, Deserialize)]
pub struct TimelineCreateRequest {
#[serde(default)]
#[serde_as(as = "Option<DisplayFromStr>")]
pub new_timeline_id: Option<TimelineId>,
#[serde(default)]

// TODO: replace with Option<TimelineAncestor>. It does not make sense
// to specify LSN without timeline, and specifying timeline without LSN
// invokes leads to a race condition.
#[serde_as(as = "Option<DisplayFromStr>")]
pub ancestor_timeline_id: Option<TimelineId>,
#[serde(default)]
#[serde_as(as = "Option<DisplayFromStr>")]
pub ancestor_start_lsn: Option<Lsn>,

pub pg_version: Option<u32>,
}

impl TimelineCreateRequest {
pub fn ancestor(&self) -> anyhow::Result<Option<TimelineAncestor>> {
match (self.ancestor_timeline_id, self.ancestor_start_lsn) {
(None, None) => Ok(None),
(Some(timeline_id), Some(start_lsn)) => Ok(Some(TimelineAncestor {
timeline_id,
start_lsn,
})),
(Some(_), None) => {
bail!("ancestor_start_lsn is not specified, this is prone to race conditions")
}
(None, Some(_)) => {
bail!("ancestor_start_lsn is specified without ancestor_timeline_id")
}
}
}
}

#[serde_as]
#[derive(Serialize, Deserialize, Default)]
pub struct TenantCreateRequest {
Expand Down
1 change: 1 addition & 0 deletions pageserver/src/http/openapi_spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ paths:
Create a timeline. Returns new timeline id on success.\
If no new timeline id is specified in parameters, it would be generated. It's an error to recreate the same timeline.
If no pg_version is specified, assume DEFAULT_PG_VERSION hardcoded in the pageserver.
If either of ancestor_timeline_id or ancestor_start_lsn is specified, both should be specified.
requestBody:
content:
application/json:
Expand Down
7 changes: 4 additions & 3 deletions pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,13 +182,14 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
let tenant = tenant_mgr::get_tenant(tenant_id, true)
.await
.map_err(ApiError::NotFound)?;
let ancestor = request_data.ancestor().map_err(ApiError::BadRequest)?;
let lsn = ancestor.as_ref().map(|x| x.start_lsn);
match tenant.create_timeline(
new_timeline_id,
request_data.ancestor_timeline_id.map(TimelineId::from),
request_data.ancestor_start_lsn,
ancestor,
request_data.pg_version.unwrap_or(crate::DEFAULT_PG_VERSION)
)
.instrument(info_span!("timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
.instrument(info_span!("timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, timeline_id = %new_timeline_id, lsn=?lsn, pg_version=?request_data.pg_version))
.await {
Ok(Some(new_timeline)) => {
// Created. Construct a TimelineInfo for it.
Expand Down
76 changes: 37 additions & 39 deletions pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
use anyhow::{bail, Context};
use bytes::Bytes;
use futures::Stream;
use pageserver_api::models::TimelineState;
use pageserver_api::models::{TimelineAncestor, TimelineState};
use remote_storage::DownloadError;
use remote_storage::GenericRemoteStorage;
use tokio::sync::watch;
Expand Down Expand Up @@ -1096,11 +1096,13 @@ impl Tenant {
/// If the caller specified the timeline ID to use (`new_timeline_id`), and timeline with
/// the same timeline ID already exists, returns None. If `new_timeline_id` is not given,
/// a new unique ID is generated.
///
/// `ancestor_start_lsn` should always be specified, because there is a race between branch
/// creation and updating the ancestor. The caller should take care of it.
pub async fn create_timeline(
&self,
new_timeline_id: TimelineId,
ancestor_timeline_id: Option<TimelineId>,
mut ancestor_start_lsn: Option<Lsn>,
ancestor: Option<TimelineAncestor>,
pg_version: u32,
) -> anyhow::Result<Option<Arc<Timeline>>> {
anyhow::ensure!(
Expand All @@ -1113,36 +1115,36 @@ impl Tenant {
return Ok(None);
}

let loaded_timeline = match ancestor_timeline_id {
Some(ancestor_timeline_id) => {
let loaded_timeline = match ancestor {
Some(mut ancestor) => {
let ancestor_timeline = self
.get_timeline(ancestor_timeline_id, false)
.get_timeline(ancestor.timeline_id, false)
.context("Cannot branch off the timeline that's not present in pageserver")?;

if let Some(lsn) = ancestor_start_lsn.as_mut() {
*lsn = lsn.align();
ancestor.start_lsn = ancestor.start_lsn.align();

{
let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
if ancestor_ancestor_lsn > *lsn {
if ancestor_ancestor_lsn > ancestor.start_lsn {
// can we safely just branch from the ancestor instead?
bail!(
"invalid start lsn {} for ancestor timeline {}: less than timeline ancestor lsn {}",
lsn,
ancestor_timeline_id,
ancestor_ancestor_lsn,
);
"invalid start lsn {} for ancestor timeline {}: less than timeline ancestor lsn {}",
ancestor.start_lsn,
ancestor.timeline_id,
ancestor_ancestor_lsn,
);
}

// Wait for the WAL to arrive and be processed on the parent branch up
// to the requested branch point. The repository code itself doesn't
// require it, but if we start to receive WAL on the new timeline,
// decoding the new WAL might need to look up previous pages, relation
// sizes etc. and that would get confused if the previous page versions
// are not in the repository yet.
ancestor_timeline.wait_lsn(*lsn).await?;
}

self.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?
// Wait for the WAL to arrive and be processed on the parent branch up
// to the requested branch point. The repository code itself doesn't
// require it, but if we start to receive WAL on the new timeline,
// decoding the new WAL might need to look up previous pages, relation
// sizes etc. and that would get confused if the previous page versions
// are not in the repository yet.
ancestor_timeline.wait_lsn(ancestor.start_lsn).await?;

self.branch_timeline(ancestor.timeline_id, new_timeline_id, ancestor.start_lsn)?
}
None => self.bootstrap_timeline(new_timeline_id, pg_version).await?,
};
Expand Down Expand Up @@ -1910,11 +1912,14 @@ impl Tenant {
}

/// Branch an existing timeline
///
/// `start_lsn` should always be specified, because race condition between get_last_record_lsn() and
/// branch creation is inevitable.
fn branch_timeline(
&self,
src: TimelineId,
dst: TimelineId,
start_lsn: Option<Lsn>,
start_lsn: Lsn,
) -> anyhow::Result<Arc<Timeline>> {
// We need to hold this lock to prevent GC from starting at the same time. GC scans the directory to learn
// about timelines, so otherwise a race condition is possible, where we create new timeline and GC
Expand Down Expand Up @@ -1942,13 +1947,6 @@ impl Tenant {

let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn();

// If no start LSN is specified, we branch the new timeline from the source timeline's last record LSN
let start_lsn = start_lsn.unwrap_or_else(|| {
let lsn = src_timeline.get_last_record_lsn();
info!("branching timeline {dst} from timeline {src} at last record LSN: {lsn}");
lsn
});

// Check if the starting LSN is out of scope because it is less than
// 1. the latest GC cutoff LSN or
// 2. the planned GC cutoff LSN, which is from an in-queue GC iteration.
Expand Down Expand Up @@ -2779,7 +2777,7 @@ mod tests {
//assert_current_logical_size(&tline, Lsn(0x40));

// Branch the history, modify relation differently on the new timeline
tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x30)))?;
tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x30))?;
let newtline = tenant
.get_timeline(NEW_TIMELINE_ID, true)
.expect("Should have a local timeline");
Expand Down Expand Up @@ -2867,7 +2865,7 @@ mod tests {
.await?;

// try to branch at lsn 25, should fail because we already garbage collected the data
match tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) {
match tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x25)) {
Ok(_) => panic!("branching should have failed"),
Err(err) => {
assert!(err.to_string().contains("invalid branch start lsn"));
Expand All @@ -2892,7 +2890,7 @@ mod tests {
.create_empty_timeline(TIMELINE_ID, Lsn(0x50), DEFAULT_PG_VERSION)?
.initialize()?;
// try to branch at lsn 0x25, should fail because initdb lsn is 0x50
match tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) {
match tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x25)) {
Ok(_) => panic!("branching should have failed"),
Err(err) => {
assert!(&err.to_string().contains("invalid branch start lsn"));
Expand Down Expand Up @@ -2940,7 +2938,7 @@ mod tests {
.initialize()?;
make_some_layers(tline.as_ref(), Lsn(0x20)).await?;

tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;
tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;
let newtline = tenant
.get_timeline(NEW_TIMELINE_ID, true)
.expect("Should have a local timeline");
Expand All @@ -2962,7 +2960,7 @@ mod tests {
.initialize()?;
make_some_layers(tline.as_ref(), Lsn(0x20)).await?;

tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;
tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;
let newtline = tenant
.get_timeline(NEW_TIMELINE_ID, true)
.expect("Should have a local timeline");
Expand Down Expand Up @@ -3018,7 +3016,7 @@ mod tests {
make_some_layers(tline.as_ref(), Lsn(0x20)).await?;
tline.checkpoint(CheckpointConfig::Forced).await?;

tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;
tenant.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;

let newtline = tenant
.get_timeline(NEW_TIMELINE_ID, true)
Expand Down Expand Up @@ -3291,7 +3289,7 @@ mod tests {
let mut tline_id = TIMELINE_ID;
for _ in 0..50 {
let new_tline_id = TimelineId::generate();
tenant.branch_timeline(tline_id, new_tline_id, Some(lsn))?;
tenant.branch_timeline(tline_id, new_tline_id, lsn)?;
tline = tenant
.get_timeline(new_tline_id, true)
.expect("Should have the branched timeline");
Expand Down Expand Up @@ -3356,7 +3354,7 @@ mod tests {
#[allow(clippy::needless_range_loop)]
for idx in 0..NUM_TLINES {
let new_tline_id = TimelineId::generate();
tenant.branch_timeline(tline_id, new_tline_id, Some(lsn))?;
tenant.branch_timeline(tline_id, new_tline_id, lsn)?;
tline = tenant
.get_timeline(new_tline_id, true)
.expect("Should have the branched timeline");
Expand Down
Loading

0 comments on commit a37052b

Please sign in to comment.