diff --git a/mayastor/src/bdev/nexus/mod.rs b/mayastor/src/bdev/nexus/mod.rs index c21912963..9727c6139 100644 --- a/mayastor/src/bdev/nexus/mod.rs +++ b/mayastor/src/bdev/nexus/mod.rs @@ -20,6 +20,7 @@ pub mod nexus_module; pub mod nexus_nbd; pub mod nexus_rpc; pub mod nexus_share; +pub mod nexus_simple_rebuild; /// public function which simply calls register module pub fn register_module() { diff --git a/mayastor/src/bdev/nexus/nexus_bdev_children.rs b/mayastor/src/bdev/nexus/nexus_bdev_children.rs index 798542e0f..4bd894f6d 100644 --- a/mayastor/src/bdev/nexus/nexus_bdev_children.rs +++ b/mayastor/src/bdev/nexus/nexus_bdev_children.rs @@ -138,10 +138,17 @@ impl Nexus { // mark faulted so that it can never take part in the IO path of // the nexus until brought online. child.state = ChildState::Faulted; + + // how to "tag" a child as out of sync? + child.repairing = true; self.children.push(child); self.child_count += 1; - // TODO -- rsync labels - Ok(self.set_state(NexusState::Degraded)) + self.set_state(NexusState::Degraded); + + // what to do if this fails? + let _ = self.sync_labels().await; + + Ok(self.simple_rebuild().await) } Err(e) => { if let Err(err) = bdev_destroy(uri).await { diff --git a/mayastor/src/bdev/nexus/nexus_simple_rebuild.rs b/mayastor/src/bdev/nexus/nexus_simple_rebuild.rs new file mode 100644 index 000000000..59ed60389 --- /dev/null +++ b/mayastor/src/bdev/nexus/nexus_simple_rebuild.rs @@ -0,0 +1,90 @@ +//! +//! This file implements operations for the rebuild process +//! +//! `simple_rebuild` does a very simple sequential rebuild +//! + +use crate::{ + bdev::nexus::{ + nexus_bdev::{ + Nexus, + NexusState, + }, + nexus_child::{ChildState}, + nexus_channel::DREvent, + }, +}; + +use std::cmp; + +impl Nexus { + // rebuilds only the first bad child it finds + pub(crate) async fn simple_rebuild(&mut self) -> NexusState { + // if failed go back to the previous state + let state = self.state; + + self.set_state(NexusState::Remuling); + + let good_child = match self.children.iter().find(|c| c.repairing == false) { + Some(good_child) => good_child, + None => return self.set_state(state), + }; + + let bad_child = match self.children.iter().find(|c| c.repairing == true) { + Some(bad_child) => bad_child, + None => return self.set_state(state), + }; + + let bdev_handle = match &bad_child.bdev_handle { + Some(bdev_handle) => bdev_handle, + None => return self.set_state(state), + }; + + let block_size = self.bdev.block_len() as u64; + let max_rebuild_size = 10u64 * 1024 * 1024; // 10MiB + let max_blocks = max_rebuild_size / block_size; + + // At the moment if we copy the whole bdev it causes the js tests to timeout + // One option is to invoke the rebuild through a different command (not on the add_child) + // TODO: fix this + let block_count = cmp::min(self.bdev.num_blocks(), max_blocks); + + info!("Rebuilding child {} from {}, blocks: {}, blockSize: {}", bad_child.name, good_child.name, block_count, block_size); + + let mut buf = match bdev_handle.dma_malloc(block_size as usize) { + Ok(buf) => buf, + Err(_) => return self.set_state(state), + }; + + for blk in 0..block_count { + + let addr: u64 = (blk+self.data_ent_offset)*(block_size as u64); + if let Err(_) = good_child.read_at(addr, &mut buf).await { + return self.set_state(state) + } + + if let Err(_) = bad_child.write_at(addr, &buf).await { + return self.set_state(state) + } + } + + // here we drop the immutable ref in favour a mutable ref so we can mutate the child + // not ideal but it works... + // alternatively we'd have to use something like "interior mutability" or AtomicBool + let bad_name = bad_child.name.clone(); + let bad_child = match self.children.iter_mut().find(|c| c.name == bad_name) { + Some(bad_child) => bad_child, + None => return state, + }; + + bad_child.repairing = false; + bad_child.state = ChildState::Open; + + info!("Rebuild of child {} is complete!", bad_name); + + // child can now be part of the IO path + self.reconfigure(DREvent::ChildOnline).await; + + self.set_state(NexusState::Online) + } +} diff --git a/mayastor/tests/nexus_simple_rebuild.rs b/mayastor/tests/nexus_simple_rebuild.rs new file mode 100644 index 000000000..056347684 --- /dev/null +++ b/mayastor/tests/nexus_simple_rebuild.rs @@ -0,0 +1,112 @@ +use crossbeam::channel::unbounded; +pub mod common; + +use mayastor::{ + bdev::{nexus_create, nexus_lookup}, + core::{ + mayastor_env_stop, + MayastorCliArgs, + MayastorEnvironment, + Reactor, + }, +}; + +static DISKNAME1: &str = "/tmp/disk1.img"; +static BDEVNAME1: &str = "aio:///tmp/disk1.img?blk_size=512"; + +static DISKNAME2: &str = "/tmp/disk2.img"; +static BDEVNAME2: &str = "aio:///tmp/disk2.img?blk_size=512"; + +static NEXUS_NAME: &str = "rebuild_test"; +static NEXUS_SIZE: u64 = 10 * 1024 * 1024; // 10MiB + +#[test] +fn rebuild_test() { + + common::truncate_file(DISKNAME1, NEXUS_SIZE / 1024); + common::truncate_file(DISKNAME2, NEXUS_SIZE / 1024); + + test_init!(); + + Reactor::block_on(rebuild_test_start()); + + common::delete_file(&[DISKNAME1.into(), DISKNAME2.into()]); +} + +async fn rebuild_test_start() { + create_nexus().await; + + let nexus = nexus_lookup(NEXUS_NAME).unwrap(); + let device = nexus.share(None).await.unwrap(); + + let nexus_device = device.clone(); + let (s, r) = unbounded::(); + std::thread::spawn(move || s.send(dd_urandom(&nexus_device))); + reactor_poll!(r); + + let nexus_device = device.clone(); + let (s, r) = unbounded::(); + std::thread::spawn(move || s.send(compare_nexus_device(&nexus_device, DISKNAME1, true))); + reactor_poll!(r); + + let nexus_device = device.clone(); + let (s, r) = unbounded::(); + std::thread::spawn(move || s.send(compare_nexus_device(&nexus_device, DISKNAME2, false))); + reactor_poll!(r); + + // add the second child -> atm it's where we rebuild as well + nexus.add_child(BDEVNAME2).await.unwrap(); + + let (s, r) = unbounded::(); + std::thread::spawn(move || s.send(compare_devices(DISKNAME1, DISKNAME2, true))); + reactor_poll!(r); + + mayastor_env_stop(0); +} + +async fn create_nexus() { + let ch = vec![BDEVNAME1.to_string()]; + nexus_create(NEXUS_NAME, NEXUS_SIZE, None, &ch) + .await + .unwrap(); +} + +pub fn dd_urandom(device: &str) -> String { + let (_, stdout, _stderr) = run_script::run( + r#" + dd if=/dev/urandom of=$1 conv=fsync,nocreat,notrunc iflag=count_bytes count=`blockdev --getsize64 $1` + "#, + &vec![device.into()], + &run_script::ScriptOptions::new(), + ) + .unwrap(); + stdout +} + +pub fn compare_nexus_device(nexus_device: &str, device: &str, expected_pass: bool) -> String { + let (exit, stdout, _stderr) = run_script::run( + r#" + cmp -n `blockdev --getsize64 $1` $1 $2 0 5M + test $? -eq $3 + "#, + &vec![nexus_device.into(), device.into(), (!expected_pass as i32).to_string()], + &run_script::ScriptOptions::new(), + ) + .unwrap(); + assert_eq!(exit, 0); + stdout +} + +pub fn compare_devices(first_device: &str, second_device: &str, expected_pass: bool) -> String { + let (exit, stdout, _stderr) = run_script::run( + r#" + cmp -b $1 $2 5M 5M + test $? -eq $3 + "#, + &vec![first_device.into(), second_device.into(), (!expected_pass as i32).to_string()], + &run_script::ScriptOptions::new(), + ) + .unwrap(); + assert_eq!(exit, 0); + stdout +}