From 53500c0c4a424573346ea58426c8eeb460603fd5 Mon Sep 17 00:00:00 2001 From: Tiago Castro Date: Tue, 11 Jun 2024 12:06:01 +0100 Subject: [PATCH] fix: don't add replica to nexus which can't rebuild properly Check if all nodes have the rebuild fix for the following operations: - increase replica count on volume - move replica - add replica to nexus - take snapshot on nr volume Checking all nodes might lead to some false positives and also some issues if some nodes have been removed from power but not from the pstor, but it's a very simple way of ensuring all nodes are fixed. A potential improvement may be to check if the replica nodes have the fix. Signed-off-by: Tiago Castro --- .../src/bin/core/nexus/operations_helper.rs | 7 + .../agents/src/bin/core/node/registry.rs | 13 +- .../agents/src/bin/core/node/specs.rs | 1 - .../src/bin/core/tests/volume/snapshot.rs | 131 +++++++++++++++++- .../agents/src/bin/core/volume/operations.rs | 4 +- .../src/bin/core/volume/operations_helper.rs | 16 ++- .../bin/core/volume/snapshot_operations.rs | 4 + .../agents/src/bin/core/volume/specs.rs | 2 + control-plane/agents/src/common/errors.rs | 9 ++ .../stor-port/src/types/v0/store/nexus.rs | 6 + .../src/types/v0/store/nexus_child.rs | 9 +- .../stor-port/src/types/v0/store/volume.rs | 4 + .../src/types/v0/transport/replica.rs | 11 +- utils/deployer-cluster/src/lib.rs | 2 +- 14 files changed, 209 insertions(+), 10 deletions(-) diff --git a/control-plane/agents/src/bin/core/nexus/operations_helper.rs b/control-plane/agents/src/bin/core/nexus/operations_helper.rs index 9d5f79212..563729fbe 100644 --- a/control-plane/agents/src/bin/core/nexus/operations_helper.rs +++ b/control-plane/agents/src/bin/core/nexus/operations_helper.rs @@ -27,6 +27,7 @@ impl OperationGuardArc { &mut self, registry: &Registry, replica: &Replica, + snapshots_present: bool, ) -> Result<(), SvcError> { // Adding a replica to a nexus will initiate a rebuild. // First check that we are able to start a rebuild. @@ -38,6 +39,7 @@ impl OperationGuardArc { nexus: self.as_ref().uuid.clone(), replica: ReplicaUri::new(&replica.uuid, &uri), auto_rebuild: true, + snapshots_present, }; self.add_replica(registry, &request).await?; Ok(()) @@ -181,6 +183,11 @@ impl OperationGuardArc { request: &AddNexusReplica, ) -> Result { let node = registry.node_wrapper(&request.node).await?; + + if request.snapshots_present { + registry.verify_rebuild_ancestry_fix().await?; + } + let replica = registry.specs().replica(request.replica.uuid()).await?; // we don't persist nexus owners to pstor anymore, instead we rebuild at startup replica.lock().owners.add_owner(&request.nexus); diff --git a/control-plane/agents/src/bin/core/node/registry.rs b/control-plane/agents/src/bin/core/node/registry.rs index 8161a20ae..35e2b9ce5 100644 --- a/control-plane/agents/src/bin/core/node/registry.rs +++ b/control-plane/agents/src/bin/core/node/registry.rs @@ -1,6 +1,6 @@ use crate::controller::{registry::Registry, wrapper::NodeWrapper}; use agents::errors::SvcError; -use stor_port::types::v0::transport::{NodeId, NodeState, Register}; +use stor_port::types::v0::transport::{NodeBugFix, NodeId, NodeState, Register}; use std::sync::Arc; use tokio::sync::RwLock; @@ -68,4 +68,15 @@ impl Registry { self.specs().register_node(self, request).await.ok(); } } + + /// Verify if all nodes have the rebuild ancestry fix and error out otherwise. + pub async fn verify_rebuild_ancestry_fix(&self) -> Result<(), SvcError> { + if !self + .specs() + .nodes_have_fix(NodeBugFix::NexusRebuildReplicaAncestry) + { + return Err(SvcError::UpgradeRequiredToRebuild {}); + } + Ok(()) + } } diff --git a/control-plane/agents/src/bin/core/node/specs.rs b/control-plane/agents/src/bin/core/node/specs.rs index d16d8b1d2..a3f1f3e20 100644 --- a/control-plane/agents/src/bin/core/node/specs.rs +++ b/control-plane/agents/src/bin/core/node/specs.rs @@ -103,7 +103,6 @@ impl ResourceSpecsLocked { } /// Check if all nodes have the fix. - #[allow(dead_code)] pub(crate) fn nodes_have_fix(&self, fix: NodeBugFix) -> bool { self.read().nodes.values().any(|n| n.lock().has_fix(&fix)) } diff --git a/control-plane/agents/src/bin/core/tests/volume/snapshot.rs b/control-plane/agents/src/bin/core/tests/volume/snapshot.rs index f59994daf..0655d4f3a 100644 --- a/control-plane/agents/src/bin/core/tests/volume/snapshot.rs +++ b/control-plane/agents/src/bin/core/tests/volume/snapshot.rs @@ -12,8 +12,8 @@ use stor_port::{ openapi::models, transport::{ CreateReplica, CreateVolume, DestroyPool, DestroyReplica, DestroyVolume, Filter, - PublishVolume, ReplicaId, SetVolumeReplica, SnapshotId, Volume, VolumeShareProtocol, - VolumeStatus, + NodeStatus, PublishVolume, ReplicaId, SetVolumeReplica, SnapshotId, Volume, + VolumeShareProtocol, VolumeStatus, }, }, }; @@ -589,6 +589,133 @@ async fn unknown_snapshot_garbage_collector() { assert_eq!(snaps.snapshots.len(), 3); } +#[tokio::test] +async fn snapshot_upgrade() { + let mb = 1024 * 1024; + let gc_period = Duration::from_millis(200); + let cluster = ClusterBuilder::builder() + .with_rest(false) + .with_agents(vec!["core"]) + .with_tmpfs_pool_ix(0, 100 * mb) + .with_tmpfs_pool_ix(1, 100 * mb) + .with_cache_period("200ms") + .with_reconcile_period(gc_period, gc_period) + .with_options(|b| { + b.with_io_engines(2) + .with_idle_io_engines(1) + .with_io_engine_tag("v2.6.1") + // testing: remove me + .with_idle_io_engine_bin("~/git/mayastor/io-engine/target/debug/io-engine-fix") + }) + .build() + .await + .unwrap(); + + let vol_cli = cluster.grpc_client().volume(); + + let volume_1 = vol_cli + .create( + &CreateVolume { + uuid: "1e3cf927-80c2-47a8-adf0-95c486bdd7b7".try_into().unwrap(), + size: 16 * mb, + replicas: 1, + thin: false, + ..Default::default() + }, + None, + ) + .await + .unwrap(); + + let volume_2 = vol_cli + .create( + &CreateVolume { + uuid: "1e3cf927-80c2-47a8-adf0-95c486bdd7b8".try_into().unwrap(), + size: 16 * mb, + replicas: 2, + thin: false, + ..Default::default() + }, + None, + ) + .await + .unwrap(); + + let volume_1 = vol_cli + .publish( + &PublishVolume { + uuid: volume_1.uuid().clone(), + share: Some(VolumeShareProtocol::Nvmf), + target_node: Some(cluster.node(0)), + ..Default::default() + }, + None, + ) + .await + .unwrap(); + + let _snapshot = vol_cli + .create_snapshot( + &CreateVolumeSnapshot::new(volume_1.uuid(), SnapshotId::new()), + None, + ) + .await + .unwrap(); + let error = vol_cli + .create_snapshot( + &CreateVolumeSnapshot::new(volume_2.uuid(), SnapshotId::new()), + None, + ) + .await + .expect_err("Can't take nr snapshot"); + assert_eq!(error.kind, ReplyErrorKind::FailedPrecondition); + + let error = vol_cli + .set_replica( + &SetVolumeReplica { + uuid: volume_1.uuid().clone(), + replicas: 2, + }, + None, + ) + .await + .expect_err("No rebuild on older version!"); + assert_eq!(error.kind, ReplyErrorKind::FailedPrecondition); + + cluster.composer().stop(&cluster.node(0)).await.unwrap(); + cluster + .wait_node_status(cluster.node(0), NodeStatus::Unknown) + .await + .unwrap(); + cluster.composer().start(&cluster.node(2)).await.unwrap(); + cluster + .wait_node_status(cluster.node(0), NodeStatus::Online) + .await + .unwrap(); + cluster.wait_pool_online(cluster.pool(0, 0)).await.unwrap(); + + tokio::time::sleep(gc_period * 5).await; + + let _volume = vol_cli + .set_replica( + &SetVolumeReplica { + uuid: volume_1.uuid().clone(), + replicas: 2, + }, + None, + ) + .await + .expect("After upgrade this should work!"); + + vol_cli + .create_snapshot( + &CreateVolumeSnapshot::new(volume_2.uuid(), SnapshotId::new()), + None, + ) + .await + .expect("Now we can take nr snapshot"); +} + #[tokio::test] #[ignore] async fn nr_snapshot() { diff --git a/control-plane/agents/src/bin/core/volume/operations.rs b/control-plane/agents/src/bin/core/volume/operations.rs index 535088af3..e430f5b12 100644 --- a/control-plane/agents/src/bin/core/volume/operations.rs +++ b/control-plane/agents/src/bin/core/volume/operations.rs @@ -655,7 +655,9 @@ impl ResourceReplicas for OperationGuardArc { .and_then(|t| registry.specs().nexus_rsc(t.nexus())) { let mut guard = nexus_spec.operation_guard()?; - guard.attach_replica(registry, &new_replica).await?; + guard + .attach_replica(registry, &new_replica, self.has_snapshots()) + .await?; if request.delete { self.remove_child_replica(request.replica(), &mut guard, registry) diff --git a/control-plane/agents/src/bin/core/volume/operations_helper.rs b/control-plane/agents/src/bin/core/volume/operations_helper.rs index e3d11dee1..e9439dfd6 100644 --- a/control-plane/agents/src/bin/core/volume/operations_helper.rs +++ b/control-plane/agents/src/bin/core/volume/operations_helper.rs @@ -347,6 +347,9 @@ impl OperationGuardArc { .min(self.as_ref().num_replicas as usize); let mut nexus_children = nexus.as_ref().children.len(); + if self.has_snapshots() { + registry.verify_rebuild_ancestry_fix().await?; + } let replicas = nexus_attach_candidates(self.as_ref(), nexus.as_ref(), registry).await?; let mut result = Ok(()); @@ -374,7 +377,10 @@ impl OperationGuardArc { continue; } - match nexus.attach_replica(registry, replica.state()).await { + match nexus + .attach_replica(registry, replica.state(), self.has_snapshots()) + .await + { Ok(_) => { nexus.info_span(|| { let state = replica.state(); @@ -588,7 +594,9 @@ impl OperationGuardArc { ) -> Result<(), SvcError> { if let Some(target) = &self.as_ref().target() { let mut nexus_guard = registry.specs().nexus(target.nexus()).await?; - nexus_guard.attach_replica(registry, &replica).await + nexus_guard + .attach_replica(registry, &replica, self.has_snapshots()) + .await } else { Ok(()) } @@ -798,4 +806,8 @@ impl OperationGuardArc { Ok(false) } } + + pub fn has_snapshots(&self) -> bool { + self.as_ref().metadata.has_snapshots() + } } diff --git a/control-plane/agents/src/bin/core/volume/snapshot_operations.rs b/control-plane/agents/src/bin/core/volume/snapshot_operations.rs index 736d39c3a..4bb811937 100644 --- a/control-plane/agents/src/bin/core/volume/snapshot_operations.rs +++ b/control-plane/agents/src/bin/core/volume/snapshot_operations.rs @@ -58,6 +58,10 @@ impl ResourceSnapshotting for OperationGuardArc { ) -> Result { let state = registry.volume_state(request.source_id()).await?; + if self.as_ref().num_replicas > 1 { + registry.verify_rebuild_ancestry_fix().await?; + } + let operation = VolumeOperation::CreateSnapshot(request.uuid().clone()); let spec_clone = self.start_update(registry, &state, operation).await?; diff --git a/control-plane/agents/src/bin/core/volume/specs.rs b/control-plane/agents/src/bin/core/volume/specs.rs index ac6aeb3be..c8b828b35 100644 --- a/control-plane/agents/src/bin/core/volume/specs.rs +++ b/control-plane/agents/src/bin/core/volume/specs.rs @@ -1060,6 +1060,8 @@ impl SpecOperationsHelper for VolumeSpec { resource: ResourceKind::AffinityGroup, count: *replica_count, }) + } else if *replica_count > self.num_replicas && self.has_snapshots() { + registry.verify_rebuild_ancestry_fix().await } else { Ok(()) } diff --git a/control-plane/agents/src/common/errors.rs b/control-plane/agents/src/common/errors.rs index ced9205e7..9613bb4b2 100644 --- a/control-plane/agents/src/common/errors.rs +++ b/control-plane/agents/src/common/errors.rs @@ -196,6 +196,8 @@ pub enum SvcError { Internal { details: String }, #[snafu(display("Invalid Arguments"))] InvalidArguments {}, + #[snafu(display("IoEngine upgrade is required to rebuild volume with snapshots"))] + UpgradeRequiredToRebuild {}, #[snafu(display("Invalid {}, labels: {} ", resource_kind, labels))] InvalidLabel { labels: String, @@ -555,6 +557,13 @@ impl From for ReplyError { extra, }, + SvcError::UpgradeRequiredToRebuild { .. } => ReplyError { + kind: ReplyErrorKind::FailedPrecondition, + resource: ResourceKind::Unknown, + source, + extra, + }, + SvcError::NodeNotOnline { .. } => ReplyError { kind: ReplyErrorKind::FailedPrecondition, resource: ResourceKind::Node, diff --git a/control-plane/stor-port/src/types/v0/store/nexus.rs b/control-plane/stor-port/src/types/v0/store/nexus.rs index 497a34002..54069086e 100644 --- a/control-plane/stor-port/src/types/v0/store/nexus.rs +++ b/control-plane/stor-port/src/types/v0/store/nexus.rs @@ -166,6 +166,12 @@ impl NexusSpec { _ => None, }) } + /// Get an iterator that references all the replica ids in the nexus. + pub fn replica_ids(&self) -> impl Iterator { + self.children + .iter() + .flat_map(|c| c.as_replica_ref().as_ref().map(|c| c.uuid())) + } } impl From<&NexusSpec> for CreateNexus { diff --git a/control-plane/stor-port/src/types/v0/store/nexus_child.rs b/control-plane/stor-port/src/types/v0/store/nexus_child.rs index a5d462967..c600f9639 100644 --- a/control-plane/stor-port/src/types/v0/store/nexus_child.rs +++ b/control-plane/stor-port/src/types/v0/store/nexus_child.rs @@ -16,13 +16,20 @@ pub enum NexusChild { } impl NexusChild { - /// Return Self as ReplicaUri + /// Return Self as ReplicaUri. pub fn as_replica(&self) -> Option { match &self { NexusChild::Replica(replica) => Some(replica.clone()), NexusChild::Uri(_) => None, } } + /// Return Self as ReplicaUri + pub fn as_replica_ref(&self) -> Option<&ReplicaUri> { + match &self { + NexusChild::Replica(replica) => Some(replica), + NexusChild::Uri(_) => None, + } + } /// Get the child URI pub fn uri(&self) -> ChildUri { match &self { diff --git a/control-plane/stor-port/src/types/v0/store/volume.rs b/control-plane/stor-port/src/types/v0/store/volume.rs index 5edfa7fb8..a8b61c8eb 100644 --- a/control-plane/stor-port/src/types/v0/store/volume.rs +++ b/control-plane/stor-port/src/types/v0/store/volume.rs @@ -254,6 +254,10 @@ impl VolumeMetadata { pub fn num_snapshots(&self) -> usize { self.runtime.snapshots.len() } + /// Check if there's any snapshot. + pub fn has_snapshots(&self) -> bool { + self.runtime.has_snapshots() + } } /// Volume meta information. diff --git a/control-plane/stor-port/src/types/v0/transport/replica.rs b/control-plane/stor-port/src/types/v0/transport/replica.rs index 276d8d5ba..e09c48e0d 100644 --- a/control-plane/stor-port/src/types/v0/transport/replica.rs +++ b/control-plane/stor-port/src/types/v0/transport/replica.rs @@ -869,15 +869,24 @@ pub struct AddNexusReplica { pub replica: ReplicaUri, /// Auto start rebuilding. pub auto_rebuild: bool, + /// Snapshots are present in the other replicas. + pub snapshots_present: bool, } impl AddNexusReplica { /// Return new `Self` from it's properties. - pub fn new(node: &NodeId, nexus: &NexusId, replica: &ReplicaUri, auto_rebuild: bool) -> Self { + pub fn new( + node: &NodeId, + nexus: &NexusId, + replica: &ReplicaUri, + auto_rebuild: bool, + snapshots_present: bool, + ) -> Self { Self { node: node.clone(), nexus: nexus.clone(), replica: replica.clone(), auto_rebuild, + snapshots_present, } } } diff --git a/utils/deployer-cluster/src/lib.rs b/utils/deployer-cluster/src/lib.rs index 554fdc698..9cfac8847 100644 --- a/utils/deployer-cluster/src/lib.rs +++ b/utils/deployer-cluster/src/lib.rs @@ -275,7 +275,7 @@ impl Cluster { } Err(()) } - /// Wait till the node is in the given status. + /// Wait till the pool is online. pub async fn wait_pool_online(&self, pool_id: PoolId) -> Result<(), ()> { let timeout = Duration::from_secs(2); let start = std::time::Instant::now();