From c455b9ce0014c9ba6b71e2e3d855441003ec0bb8 Mon Sep 17 00:00:00 2001 From: Pierre Marsais Date: Sun, 31 Jul 2022 01:45:45 +0100 Subject: [PATCH] cp: Implement `--sparse` flag This begins to address #3362 At the moment, only the `--sparse=always` logic matches the requirement form GNU cp info page, i.e. always make holes in destination when possible. Sparse copy is done by copying the source to the destination block by block (blocks being of the destination's fs block size). If the block only holds NUL bytes, we don't write to the destination. About `--sparse=auto`: according to GNU cp info page, the destination file will be made sparse if the source file is sparse as well. The next step are likely to use `lseek` with `SEEK_HOLE` detect if the source file has holes. Currently, this has the same behaviour as `--sparse=never`. This `SEEK_HOLE` logic can also be applied to `--sparse=always` to improve performance when copying sparse files. About `--sparse=never`: from my understanding, it is not guaranteed that Rust's `fs::copy` will always produce a file with no holes, as ["platform-specific behavior may change in the future"](https://doc.rust-lang.org/std/fs/fn.copy.html#platform-specific-behavior) About other platforms: - `macos`: The solution may be to use `fcntl` command `F_PUNCHHOLE`. - `windows`: I only see `FSCTL_SET_SPARSE`. This should pass the following GNU tests: - `tests/cp/sparse.sh` - `tests/cp/sparse-2.sh` - `tests/cp/sparse-extents.sh` - `tests/cp/sparse-extents-2.sh` `sparse-perf.sh` needs `--sparse=auto`, and in particular a way to skip holes in the source file. --- src/uu/cp/src/cp.rs | 60 ++++++++++++++++++++++--- tests/by-util/test_cp.rs | 96 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 7 deletions(-) diff --git a/src/uu/cp/src/cp.rs b/src/uu/cp/src/cp.rs index 1dd48df112c..aea0f55f537 100644 --- a/src/uu/cp/src/cp.rs +++ b/src/uu/cp/src/cp.rs @@ -1581,18 +1581,21 @@ fn copy_on_write_linux( source: &Path, dest: &Path, reflink_mode: ReflinkMode, - _sparse_mode: SparseMode, + sparse_mode: SparseMode, context: &str, ) -> CopyResult<()> { - let src_file = File::open(source).context(context)?; + use std::os::unix::prelude::MetadataExt; + + let mut src_file = File::open(source).context(context)?; let dst_file = OpenOptions::new() .write(true) - .truncate(false) + .truncate(true) .create(true) .open(dest) .context(context)?; - match reflink_mode { - ReflinkMode::Always => unsafe { + + match (reflink_mode, sparse_mode) { + (ReflinkMode::Always, SparseMode::Auto) => unsafe { let result = libc::ioctl(dst_file.as_raw_fd(), FICLONE!(), src_file.as_raw_fd()); if result != 0 { @@ -1607,7 +1610,50 @@ fn copy_on_write_linux( Ok(()) } }, - ReflinkMode::Auto => unsafe { + (ReflinkMode::Always, SparseMode::Always) | (ReflinkMode::Always, SparseMode::Never) => { + Err("`--reflink=always` can be used only with --sparse=auto".into()) + } + (_, SparseMode::Always) => unsafe { + let size: usize = src_file.metadata()?.size().try_into().unwrap(); + if libc::ftruncate(dst_file.as_raw_fd(), i64::try_from(size).unwrap()) < 0 { + return Err(format!( + "failed to ftruncate {:?} to size {}: {}", + dest, + size, + std::io::Error::last_os_error() + ) + .into()); + } + + let blksize = dst_file.metadata()?.blksize(); + let mut buf: Vec = vec![0; blksize.try_into().unwrap()]; + let mut current_offset: usize = 0; + + while current_offset < size { + use std::io::Read; + let (mut off_src, mut off_dst): (i64, i64) = ( + current_offset.try_into().unwrap(), + current_offset.try_into().unwrap(), + ); + + let this_read = src_file.read(&mut buf)?; + current_offset += this_read; + + if buf.iter().any(|&x| x != 0) { + const COPY_FLAGS: u32 = 0; + libc::copy_file_range( + src_file.as_raw_fd(), + &mut off_src, + dst_file.as_raw_fd(), + &mut off_dst, + this_read, + COPY_FLAGS, + ); + } + } + Ok(()) + }, + (ReflinkMode::Auto, SparseMode::Auto) | (ReflinkMode::Auto, SparseMode::Never) => unsafe { let result = libc::ioctl(dst_file.as_raw_fd(), FICLONE!(), src_file.as_raw_fd()); if result != 0 { @@ -1615,7 +1661,7 @@ fn copy_on_write_linux( } Ok(()) }, - ReflinkMode::Never => { + (ReflinkMode::Never, _) => { fs::copy(source, dest).context(context)?; Ok(()) } diff --git a/tests/by-util/test_cp.rs b/tests/by-util/test_cp.rs index 321f38990ae..c6bc8728412 100644 --- a/tests/by-util/test_cp.rs +++ b/tests/by-util/test_cp.rs @@ -1388,6 +1388,102 @@ fn test_closes_file_descriptors() { .succeeds(); } +#[cfg(any(target_os = "linux", target_os = "android"))] +#[test] +fn test_cp_sparse_never_empty() { + let (at, mut ucmd) = at_and_ucmd!(); + + const BUFSZE: usize = 4096 * 4; + let buf: [u8; BUFSZE] = [0; BUFSZE]; + + at.make_file("src_file1"); + at.write_bytes("src_file1", &buf); + + ucmd.args(&["--sparse=never", "src_file1", "dst_file_non_sparse"]) + .succeeds(); + assert_eq!(at.read_bytes("dst_file_non_sparse"), buf); + assert_eq!( + at.metadata("dst_file_non_sparse").blocks() * 512, + buf.len() as u64 + ); +} + +#[cfg(any(target_os = "linux", target_os = "android"))] +#[test] +fn test_cp_sparse_always_empty() { + let (at, mut ucmd) = at_and_ucmd!(); + + const BUFSZE: usize = 4096 * 4; + let buf: [u8; BUFSZE] = [0; BUFSZE]; + + at.make_file("src_file1"); + at.write_bytes("src_file1", &buf); + + ucmd.args(&["--sparse=always", "src_file1", "dst_file_sparse"]) + .succeeds(); + + assert_eq!(at.read_bytes("dst_file_sparse"), buf); + assert_eq!(at.metadata("dst_file_sparse").blocks(), 0); +} + +#[cfg(any(target_os = "linux", target_os = "android"))] +#[test] +fn test_cp_sparse_always_non_empty() { + let (at, mut ucmd) = at_and_ucmd!(); + + const BUFSZE: usize = 4096 * 16 + 3; + let mut buf: [u8; BUFSZE] = [0; BUFSZE]; + let blocks_to_touch = [buf.len() / 3, 2 * (buf.len() / 3)]; + + for i in blocks_to_touch { + buf[i] = 'x' as u8; + } + + at.make_file("src_file1"); + at.write_bytes("src_file1", &buf); + + ucmd.args(&["--sparse=always", "src_file1", "dst_file_sparse"]) + .succeeds(); + + let touched_block_count = + blocks_to_touch.len() as u64 * at.metadata("dst_file_sparse").blksize() / 512; + + assert_eq!(at.read_bytes("dst_file_sparse"), buf); + assert_eq!(at.metadata("dst_file_sparse").blocks(), touched_block_count); +} + +#[cfg(any(target_os = "linux", target_os = "android"))] +#[test] +fn test_cp_sparse_always_reflink_always() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.make_file("src_file1"); + + ucmd.args(&[ + "--sparse=always", + "--reflink=always", + "src_file1", + "dst_file", + ]) + .fails(); +} + +#[cfg(any(target_os = "linux", target_os = "android"))] +#[test] +fn test_cp_sparse_never_reflink_always() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.make_file("src_file1"); + + ucmd.args(&[ + "--sparse=never", + "--reflink=always", + "src_file1", + "dst_file", + ]) + .fails(); +} + #[test] fn test_copy_dir_symlink() { let (at, mut ucmd) = at_and_ucmd!();