diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 0c911939e848e..adf492ace762c 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1365,7 +1365,7 @@ impl Tenant { initdb_lsn: Lsn, pg_version: u32, ctx: &RequestContext, - delta_layer_desc: Vec>, + delta_layer_desc: Vec, image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>, end_lsn: Lsn, ) -> anyhow::Result> { @@ -2933,7 +2933,7 @@ impl Tenant { dst_id: TimelineId, ancestor_lsn: Option, ctx: &RequestContext, - delta_layer_desc: Vec>, + delta_layer_desc: Vec, image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>, end_lsn: Lsn, ) -> anyhow::Result> { @@ -3933,7 +3933,7 @@ mod tests { use storage_layer::PersistentLayerKey; use tests::storage_layer::ValuesReconstructState; use tests::timeline::{GetVectoredError, ShutdownMode}; - use timeline::GcInfo; + use timeline::{DeltaLayerTestDesc, GcInfo}; use utils::bin_ser::BeSer; use utils::id::TenantId; @@ -6229,27 +6229,6 @@ mod tests { .await .unwrap(); - async fn get_vectored_impl_wrapper( - tline: &Arc, - key: Key, - lsn: Lsn, - ctx: &RequestContext, - ) -> Result, GetVectoredError> { - let mut reconstruct_state = ValuesReconstructState::new(); - let mut res = tline - .get_vectored_impl( - KeySpace::single(key..key.next()), - lsn, - &mut reconstruct_state, - ctx, - ) - .await?; - Ok(res.pop_last().map(|(k, v)| { - assert_eq!(k, key); - v.unwrap() - })) - } - let lsn = Lsn(0x30); // test vectored get on parent timeline @@ -6325,27 +6304,6 @@ mod tests { .await .unwrap(); - async fn get_vectored_impl_wrapper( - tline: &Arc, - key: Key, - lsn: Lsn, - ctx: &RequestContext, - ) -> Result, GetVectoredError> { - let mut reconstruct_state = ValuesReconstructState::new(); - let mut res = tline - .get_vectored_impl( - KeySpace::single(key..key.next()), - lsn, - &mut reconstruct_state, - ctx, - ) - .await?; - Ok(res.pop_last().map(|(k, v)| { - assert_eq!(k, key); - v.unwrap() - })) - } - let lsn = Lsn(0x30); // test vectored get on parent timeline @@ -6421,9 +6379,18 @@ mod tests { &ctx, // delta layers vec![ - vec![(key2, Lsn(0x10), Value::Image(test_img("metadata key 2")))], - vec![(key1, Lsn(0x20), Value::Image(Bytes::new()))], - vec![(key2, Lsn(0x20), Value::Image(Bytes::new()))], + DeltaLayerTestDesc::new_with_inferred_key_range( + Lsn(0x10)..Lsn(0x20), + vec![(key2, Lsn(0x10), Value::Image(test_img("metadata key 2")))], + ), + DeltaLayerTestDesc::new_with_inferred_key_range( + Lsn(0x20)..Lsn(0x30), + vec![(key1, Lsn(0x20), Value::Image(Bytes::new()))], + ), + DeltaLayerTestDesc::new_with_inferred_key_range( + Lsn(0x20)..Lsn(0x30), + vec![(key2, Lsn(0x20), Value::Image(Bytes::new()))], + ), ], // image layers vec![ @@ -6489,17 +6456,29 @@ mod tests { &ctx, // delta layers vec![ - vec![(key2, Lsn(0x10), Value::Image(test_img("metadata key 2")))], - vec![(key1, Lsn(0x20), Value::Image(Bytes::new()))], - vec![(key2, Lsn(0x20), Value::Image(Bytes::new()))], - vec![ - (key0, Lsn(0x30), Value::Image(test_img("metadata key 0"))), - (key3, Lsn(0x30), Value::Image(test_img("metadata key 3"))), - ], + DeltaLayerTestDesc::new_with_inferred_key_range( + Lsn(0x10)..Lsn(0x20), + vec![(key2, Lsn(0x10), Value::Image(test_img("metadata key 2")))], + ), + DeltaLayerTestDesc::new_with_inferred_key_range( + Lsn(0x20)..Lsn(0x30), + vec![(key1, Lsn(0x20), Value::Image(Bytes::new()))], + ), + DeltaLayerTestDesc::new_with_inferred_key_range( + Lsn(0x20)..Lsn(0x30), + vec![(key2, Lsn(0x20), Value::Image(Bytes::new()))], + ), + DeltaLayerTestDesc::new_with_inferred_key_range( + Lsn(0x30)..Lsn(0x40), + vec![ + (key0, Lsn(0x30), Value::Image(test_img("metadata key 0"))), + (key3, Lsn(0x30), Value::Image(test_img("metadata key 3"))), + ], + ), ], // image layers vec![(Lsn(0x10), vec![(key1, test_img("metadata key 1"))])], - Lsn(0x30), + Lsn(0x40), ) .await .unwrap(); @@ -6522,7 +6501,7 @@ mod tests { // Image layers are created at last_record_lsn let images = tline - .inspect_image_layers(Lsn(0x30), &ctx) + .inspect_image_layers(Lsn(0x40), &ctx) .await .unwrap() .into_iter() @@ -6548,9 +6527,18 @@ mod tests { &ctx, // delta layers vec![ - vec![(key2, Lsn(0x10), Value::Image(test_img("metadata key 2")))], - vec![(key1, Lsn(0x20), Value::Image(Bytes::new()))], - vec![(key2, Lsn(0x20), Value::Image(Bytes::new()))], + DeltaLayerTestDesc::new_with_inferred_key_range( + Lsn(0x10)..Lsn(0x20), + vec![(key2, Lsn(0x10), Value::Image(test_img("metadata key 2")))], + ), + DeltaLayerTestDesc::new_with_inferred_key_range( + Lsn(0x20)..Lsn(0x30), + vec![(key1, Lsn(0x20), Value::Image(Bytes::new()))], + ), + DeltaLayerTestDesc::new_with_inferred_key_range( + Lsn(0x20)..Lsn(0x30), + vec![(key2, Lsn(0x20), Value::Image(Bytes::new()))], + ), ], // image layers vec![(Lsn(0x10), vec![(key1, test_img("metadata key 1"))])], @@ -6598,15 +6586,21 @@ mod tests { key } - // We create one bottom-most image layer, a delta layer D1 crossing the GC horizon, D2 below the horizon, and D3 above the horizon. + // We create + // - one bottom-most image layer, + // - a delta layer D1 crossing the GC horizon with data below and above the horizon, + // - a delta layer D2 crossing the GC horizon with data only below the horizon, + // - a delta layer D3 above the horizon. // - // | D1 | | D3 | + // | D3 | + // | D1 | // -| |-- gc horizon ----------------- // | | | D2 | // --------- img layer ------------------ // // What we should expact from this compaction is: - // | Part of D1 | | D3 | + // | D3 | + // | Part of D1 | // --------- img layer with D1+D2 at GC horizon------------------ // img layer at 0x10 @@ -6646,13 +6640,13 @@ mod tests { let delta3 = vec![ ( get_key(8), - Lsn(0x40), - Value::Image(Bytes::from("value 8@0x40")), + Lsn(0x48), + Value::Image(Bytes::from("value 8@0x48")), ), ( get_key(9), - Lsn(0x40), - Value::Image(Bytes::from("value 9@0x40")), + Lsn(0x48), + Value::Image(Bytes::from("value 9@0x48")), ), ]; @@ -6662,7 +6656,11 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, - vec![delta1, delta2, delta3], // delta layers + vec![ + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta1), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta2), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x48)..Lsn(0x50), delta3), + ], // delta layers vec![(Lsn(0x10), img_layer)], // image layers Lsn(0x50), ) @@ -6683,8 +6681,8 @@ mod tests { Bytes::from_static(b"value 5@0x20"), Bytes::from_static(b"value 6@0x20"), Bytes::from_static(b"value 7@0x10"), - Bytes::from_static(b"value 8@0x40"), - Bytes::from_static(b"value 9@0x40"), + Bytes::from_static(b"value 8@0x48"), + Bytes::from_static(b"value 9@0x48"), ]; for (idx, expected) in expected_result.iter().enumerate() { @@ -6772,10 +6770,10 @@ mod tests { lsn_range: Lsn(0x30)..Lsn(0x41), is_delta: true }, - // The delta layer we created and should not be picked for the compaction + // The delta3 layer that should not be picked for the compaction PersistentLayerKey { key_range: get_key(8)..get_key(10), - lsn_range: Lsn(0x40)..Lsn(0x41), + lsn_range: Lsn(0x48)..Lsn(0x50), is_delta: true } ] @@ -6839,7 +6837,10 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, - vec![delta1], // delta layers + vec![DeltaLayerTestDesc::new_with_inferred_key_range( + Lsn(0x10)..Lsn(0x40), + delta1, + )], // delta layers vec![(Lsn(0x10), image1)], // image layers Lsn(0x50), ) @@ -6963,15 +6964,21 @@ mod tests { key } - // We create one bottom-most image layer, a delta layer D1 crossing the GC horizon, D2 below the horizon, and D3 above the horizon. + // We create + // - one bottom-most image layer, + // - a delta layer D1 crossing the GC horizon with data below and above the horizon, + // - a delta layer D2 crossing the GC horizon with data only below the horizon, + // - a delta layer D3 above the horizon. // - // | D1 | | D3 | + // | D3 | + // | D1 | // -| |-- gc horizon ----------------- // | | | D2 | // --------- img layer ------------------ // // What we should expact from this compaction is: - // | Part of D1 | | D3 | + // | D3 | + // | Part of D1 | // --------- img layer with D1+D2 at GC horizon------------------ // img layer at 0x10 @@ -7021,13 +7028,13 @@ mod tests { let delta3 = vec![ ( get_key(8), - Lsn(0x40), - Value::WalRecord(NeonWalRecord::wal_append("@0x40")), + Lsn(0x48), + Value::WalRecord(NeonWalRecord::wal_append("@0x48")), ), ( get_key(9), - Lsn(0x40), - Value::WalRecord(NeonWalRecord::wal_append("@0x40")), + Lsn(0x48), + Value::WalRecord(NeonWalRecord::wal_append("@0x48")), ), ]; @@ -7037,7 +7044,11 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, - vec![delta1, delta2, delta3], // delta layers + vec![ + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x48), delta1), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x48), delta2), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x48)..Lsn(0x50), delta3), + ], // delta layers vec![(Lsn(0x10), img_layer)], // image layers Lsn(0x50), ) @@ -7064,8 +7075,8 @@ mod tests { Bytes::from_static(b"value 5@0x10@0x20"), Bytes::from_static(b"value 6@0x10@0x20"), Bytes::from_static(b"value 7@0x10"), - Bytes::from_static(b"value 8@0x10@0x40"), - Bytes::from_static(b"value 9@0x10@0x40"), + Bytes::from_static(b"value 8@0x10@0x48"), + Bytes::from_static(b"value 9@0x10@0x48"), ]; let expected_result_at_gc_horizon = [ diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index de9361d72103f..df4d252ad21e9 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -4735,6 +4735,42 @@ impl DurationRecorder { } } +/// Descriptor for a delta layer used in testing infra. The start/end key/lsn range of the +/// delta layer might be different from the min/max key/lsn in the delta layer. Therefore, +/// the layer descriptor requires the user to provide the ranges, which should cover all +/// keys specified in the `data` field. +#[cfg(test)] +pub struct DeltaLayerTestDesc { + pub lsn_range: Range, + pub key_range: Range, + pub data: Vec<(Key, Lsn, Value)>, +} + +#[cfg(test)] +impl DeltaLayerTestDesc { + #[allow(dead_code)] + pub fn new(lsn_range: Range, key_range: Range, data: Vec<(Key, Lsn, Value)>) -> Self { + Self { + lsn_range, + key_range, + data, + } + } + + pub fn new_with_inferred_key_range( + lsn_range: Range, + data: Vec<(Key, Lsn, Value)>, + ) -> Self { + let key_min = data.iter().map(|(key, _, _)| key).min().unwrap(); + let key_max = data.iter().map(|(key, _, _)| key).max().unwrap(); + Self { + key_range: (*key_min)..(key_max.next()), + lsn_range, + data, + } + } +} + impl Timeline { async fn finish_compact_batch( self: &Arc, @@ -5535,37 +5571,65 @@ impl Timeline { #[cfg(test)] pub(super) async fn force_create_delta_layer( self: &Arc, - mut deltas: Vec<(Key, Lsn, Value)>, + mut deltas: DeltaLayerTestDesc, check_start_lsn: Option, ctx: &RequestContext, ) -> anyhow::Result<()> { let last_record_lsn = self.get_last_record_lsn(); - deltas.sort_unstable_by(|(ka, la, _), (kb, lb, _)| (ka, la).cmp(&(kb, lb))); - let min_key = *deltas.first().map(|(k, _, _)| k).unwrap(); - let end_key = deltas.last().map(|(k, _, _)| k).unwrap().next(); - let min_lsn = *deltas.iter().map(|(_, lsn, _)| lsn).min().unwrap(); - let max_lsn = *deltas.iter().map(|(_, lsn, _)| lsn).max().unwrap(); + deltas + .data + .sort_unstable_by(|(ka, la, _), (kb, lb, _)| (ka, la).cmp(&(kb, lb))); + assert!(deltas.data.first().unwrap().0 >= deltas.key_range.start); + assert!(deltas.data.last().unwrap().0 < deltas.key_range.end); + for (_, lsn, _) in &deltas.data { + assert!(deltas.lsn_range.start <= *lsn && *lsn < deltas.lsn_range.end); + } assert!( - max_lsn <= last_record_lsn, - "advance last record lsn before inserting a layer, max_lsn={max_lsn}, last_record_lsn={last_record_lsn}" + deltas.lsn_range.end <= last_record_lsn, + "advance last record lsn before inserting a layer, end_lsn={}, last_record_lsn={}", + deltas.lsn_range.end, + last_record_lsn ); - let end_lsn = Lsn(max_lsn.0 + 1); if let Some(check_start_lsn) = check_start_lsn { - assert!(min_lsn >= check_start_lsn); + assert!(deltas.lsn_range.start >= check_start_lsn); + } + // check if the delta layer does not violate the LSN invariant, the legacy compaction should always produce a batch of + // layers of the same start/end LSN, and so should the force inserted layer + { + /// Checks if a overlaps with b, assume a/b = [start, end). + pub fn overlaps_with(a: &Range, b: &Range) -> bool { + !(a.end <= b.start || b.end <= a.start) + } + + let guard = self.layers.read().await; + for layer in guard.layer_map().iter_historic_layers() { + if layer.is_delta() + && overlaps_with(&layer.lsn_range, &deltas.lsn_range) + && layer.lsn_range != deltas.lsn_range + { + // If a delta layer overlaps with another delta layer AND their LSN range is not the same, panic + panic!( + "inserted layer violates delta layer LSN invariant: current_lsn_range={}..{}, conflict_lsn_range={}..{}", + deltas.lsn_range.start, deltas.lsn_range.end, layer.lsn_range.start, layer.lsn_range.end + ); + } + } } let mut delta_layer_writer = DeltaLayerWriter::new( self.conf, self.timeline_id, self.tenant_shard_id, - min_key, - min_lsn..end_lsn, + deltas.key_range.start, + deltas.lsn_range, ctx, ) .await?; - for (key, lsn, val) in deltas { + for (key, lsn, val) in deltas.data { delta_layer_writer.put_value(key, lsn, val, ctx).await?; } - let delta_layer = delta_layer_writer.finish(end_key, self, ctx).await?; + let delta_layer = delta_layer_writer + .finish(deltas.key_range.end, self, ctx) + .await?; { let mut guard = self.layers.write().await;