Skip to content

Commit

Permalink
perf: reflink directories at once on macOS (#995)
Browse files Browse the repository at this point in the history
  • Loading branch information
wolfv authored Dec 20, 2024
1 parent b947836 commit 4e5b274
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 48 deletions.
78 changes: 41 additions & 37 deletions crates/rattler/src/install/link.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! This module contains the logic to link a give file from the package cache into the target directory.
//! See [`link_file`] for more information.
use fs_err as fs;
use memmap2::Mmap;
use once_cell::sync::Lazy;
use rattler_conda_types::package::{FileMode, PathType, PathsEntry, PrefixPlaceholder};
Expand All @@ -12,7 +13,7 @@ use std::borrow::Cow;
use std::fmt;
use std::fmt::Formatter;
use std::fs::Permissions;
use std::io::{ErrorKind, Read, Seek, Write};
use std::io::{BufWriter, ErrorKind, Read, Seek, Write};
use std::path::{Path, PathBuf};

use super::apple_codesign::{codesign, AppleCodeSignBehavior};
Expand Down Expand Up @@ -165,8 +166,11 @@ pub fn link_file(
let source = map_or_read_source_file(&source_path)?;

// Open the destination file
let destination = std::fs::File::create(&destination_path)
.map_err(LinkFileError::FailedToOpenDestinationFile)?;
let destination = BufWriter::with_capacity(
50 * 1024,
fs::File::create(&destination_path)
.map_err(LinkFileError::FailedToOpenDestinationFile)?,
);
let mut destination_writer = HashingWriter::<_, rattler_digest::Sha256>::new(destination);

// Convert back-slashes (\) on windows with forward-slashes (/) to avoid problems with
Expand Down Expand Up @@ -213,9 +217,9 @@ pub fn link_file(

// Copy over filesystem permissions. We do this to ensure that the destination file has the
// same permissions as the source file.
let metadata = std::fs::symlink_metadata(&source_path)
let metadata = fs::symlink_metadata(&source_path)
.map_err(LinkFileError::FailedToReadSourceFileMetadata)?;
std::fs::set_permissions(&destination_path, metadata.permissions())
fs::set_permissions(&destination_path, metadata.permissions())
.map_err(LinkFileError::FailedToUpdateDestinationFilePermissions)?;

// (re)sign the binary if the file is executable
Expand Down Expand Up @@ -247,7 +251,7 @@ pub fn link_file(
.map_err(LinkFileError::FailedToComputeSha)?,
);
file_size = Some(
std::fs::symlink_metadata(&destination_path)
fs::symlink_metadata(&destination_path)
.map_err(LinkFileError::FailedToOpenDestinationFile)?
.len(),
);
Expand Down Expand Up @@ -297,7 +301,7 @@ pub fn link_file(
} else if let Some(size_in_bytes) = path_json_entry.size_in_bytes {
size_in_bytes
} else {
let metadata = std::fs::symlink_metadata(&destination_path)
let metadata = fs::symlink_metadata(&destination_path)
.map_err(LinkFileError::FailedToOpenDestinationFile)?;
metadata.len()
};
Expand Down Expand Up @@ -342,8 +346,7 @@ impl AsRef<[u8]> for MmapOrBytes {
/// <https://github.com/prefix-dev/pixi/issues/234>
#[allow(clippy::verbose_file_reads)]
fn map_or_read_source_file(source_path: &Path) -> Result<MmapOrBytes, LinkFileError> {
let mut file =
std::fs::File::open(source_path).map_err(LinkFileError::FailedToOpenSourceFile)?;
let mut file = fs::File::open(source_path).map_err(LinkFileError::FailedToOpenSourceFile)?;

// Try to memory map the file
let mmap = unsafe { Mmap::map(&file) };
Expand Down Expand Up @@ -379,15 +382,15 @@ fn reflink_to_destination(
{
// Copy over filesystem permissions. We do this to ensure that the destination file has the
// same permissions as the source file.
let metadata = std::fs::metadata(source_path)
let metadata = fs::metadata(source_path)
.map_err(LinkFileError::FailedToReadSourceFileMetadata)?;
std::fs::set_permissions(destination_path, metadata.permissions())
fs::set_permissions(destination_path, metadata.permissions())
.map_err(LinkFileError::FailedToUpdateDestinationFilePermissions)?;
}
return Ok(LinkMethod::Reflink);
}
Err(e) if e.kind() == ErrorKind::AlreadyExists => {
std::fs::remove_file(destination_path).map_err(|err| {
fs::remove_file(destination_path).map_err(|err| {
LinkFileError::IoError(String::from("removing clobbered file"), err)
})?;
}
Expand Down Expand Up @@ -415,10 +418,10 @@ fn hardlink_to_destination(
destination_path: &Path,
) -> Result<LinkMethod, LinkFileError> {
loop {
match std::fs::hard_link(source_path, destination_path) {
match fs::hard_link(source_path, destination_path) {
Ok(_) => return Ok(LinkMethod::Hardlink),
Err(e) if e.kind() == ErrorKind::AlreadyExists => {
std::fs::remove_file(destination_path).map_err(|err| {
fs::remove_file(destination_path).map_err(|err| {
LinkFileError::IoError(String::from("removing clobbered file"), err)
})?;
}
Expand Down Expand Up @@ -446,7 +449,7 @@ fn symlink_to_destination(
match symlink(&linked_path, destination_path) {
Ok(_) => return Ok(LinkMethod::Softlink),
Err(e) if e.kind() == ErrorKind::AlreadyExists => {
std::fs::remove_file(destination_path).map_err(|err| {
fs::remove_file(destination_path).map_err(|err| {
LinkFileError::IoError(String::from("removing clobbered file"), err)
})?;
}
Expand All @@ -468,10 +471,10 @@ fn copy_to_destination(
destination_path: &Path,
) -> Result<LinkMethod, LinkFileError> {
loop {
match std::fs::copy(source_path, destination_path) {
match fs::copy(source_path, destination_path) {
Err(e) if e.kind() == ErrorKind::AlreadyExists => {
// If the file already exists, remove it and try again.
std::fs::remove_file(destination_path).map_err(|err| {
fs::remove_file(destination_path).map_err(|err| {
LinkFileError::IoError(String::from("removing clobbered file"), err)
})?;
}
Expand Down Expand Up @@ -653,21 +656,20 @@ pub fn copy_and_replace_textual_placeholder(
source_bytes = rest;
}

loop {
if let Some(index) = memchr::memmem::find(source_bytes, old_prefix) {
// Write all bytes up to the old prefix, followed by the new prefix.
destination.write_all(&source_bytes[..index])?;
destination.write_all(new_prefix)?;
let mut last_match = 0;

// Skip past the old prefix in the source bytes
source_bytes = &source_bytes[index + old_prefix.len()..];
} else {
// The old prefix was not found in the (remaining) source bytes.
// Write the rest of the bytes
destination.write_all(source_bytes)?;
return Ok(());
}
for index in memchr::memmem::find_iter(source_bytes, old_prefix) {
destination.write_all(&source_bytes[last_match..index])?;
destination.write_all(new_prefix)?;
last_match = index + old_prefix.len();
}

// Write remaining bytes
if last_match < source_bytes.len() {
destination.write_all(&source_bytes[last_match..])?;
}

Ok(())
}

/// Given the contents of a file, copies it to the `destination` and in the process replace any
Expand All @@ -688,8 +690,10 @@ pub fn copy_and_replace_cstring_placeholder(
let old_prefix = prefix_placeholder.as_bytes();
let new_prefix = target_prefix.as_bytes();

let finder = memchr::memmem::Finder::new(old_prefix);

loop {
if let Some(index) = memchr::memmem::find(source_bytes, old_prefix) {
if let Some(index) = finder.find(source_bytes) {
// write all bytes up to the old prefix, followed by the new prefix.
destination.write_all(&source_bytes[..index])?;

Expand All @@ -704,7 +708,7 @@ pub fn copy_and_replace_cstring_placeholder(
let old_len = old_bytes.len();

// replace all occurrences of the old prefix with the new prefix
while let Some(index) = memchr::memmem::find(old_bytes, old_prefix) {
while let Some(index) = finder.find(old_bytes) {
out.write_all(&old_bytes[..index])?;
out.write_all(new_prefix)?;
old_bytes = &old_bytes[index + old_prefix.len()..];
Expand Down Expand Up @@ -738,9 +742,9 @@ pub fn copy_and_replace_cstring_placeholder(

fn symlink(source_path: &Path, destination_path: &Path) -> std::io::Result<()> {
#[cfg(windows)]
return std::os::windows::fs::symlink_file(source_path, destination_path);
return fs_err::os::windows::fs::symlink_file(source_path, destination_path);
#[cfg(unix)]
return std::os::unix::fs::symlink(source_path, destination_path);
return fs_err::os::unix::fs::symlink(source_path, destination_path);
}

#[allow(unused_variables)]
Expand All @@ -753,12 +757,12 @@ fn has_executable_permissions(permissions: &Permissions) -> bool {

#[cfg(test)]
mod test {
use super::PYTHON_REGEX;
use fs_err as fs;
use rattler_conda_types::Platform;
use rstest::rstest;
use std::io::Cursor;

use super::PYTHON_REGEX;

#[rstest]
#[case("Hello, cruel world!", "cruel", "fabulous", "Hello, fabulous world!")]
#[case(
Expand Down Expand Up @@ -910,7 +914,7 @@ mod test {
for _ in 0..15 {
target_prefix.push_str("verylongstring/");
}
let input = std::fs::read(test_file).unwrap();
let input = fs::read(test_file).unwrap();
let mut output = Cursor::new(Vec::new());
super::copy_and_replace_textual_placeholder(
&input,
Expand Down
87 changes: 81 additions & 6 deletions crates/rattler/src/install/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -678,12 +678,84 @@ pub fn link_package_sync(
.push((entry, computed_path));
}

for directory in directories_to_construct.into_iter().sorted() {
let full_path = target_dir.join(directory);
match fs::create_dir(&full_path) {
Ok(_) => (),
Err(e) if e.kind() == ErrorKind::AlreadyExists => (),
Err(e) => return Err(InstallError::FailedToCreateDirectory(full_path, e)),
let mut created_directories = HashSet::new();
let mut reflinked_files = HashMap::new();
for directory in directories_to_construct
.into_iter()
.sorted_by(|a, b| a.components().count().cmp(&b.components().count()))
{
let full_path = target_dir.join(&directory);

// if we already (recursively) created the parent directory we can skip this
if created_directories
.iter()
.any(|dir| directory.starts_with(dir))
{
continue;
}

// can we lock this directory?
if full_path.exists() {
// tracing::info!("directory already exists: {:?}", directory);
continue;
} else if allow_ref_links && cfg!(target_os = "macos") && !index_json.noarch.is_python() {
// reflink the whole directory if possible
// currently this does not handle noarch packages
match reflink_copy::reflink(package_dir.join(&directory), &full_path) {
Ok(_) => {
created_directories.insert(directory.clone());
// remove paths that we just reflinked (everything that starts with the directory)
let (matching, non_matching): (HashMap<_, _>, HashMap<_, _>) =
paths_by_directory
.drain()
.partition(|(k, _)| k.starts_with(&directory));

// Store matching paths in reflinked_files
reflinked_files.extend(matching);
// Keep non-matching paths in paths_by_directory
paths_by_directory = non_matching;
}
Err(e) if e.kind() == ErrorKind::AlreadyExists => (),
Err(e) => return Err(InstallError::FailedToCreateDirectory(full_path, e)),
}
} else {
match fs::create_dir(&full_path) {
Ok(_) => (),
Err(e) if e.kind() == ErrorKind::AlreadyExists => (),
Err(e) => return Err(InstallError::FailedToCreateDirectory(full_path, e)),
}
}
}

// Take care of all the reflinked files (macos only)
// - Add them to the paths.json
// - Fix any occurences of the prefix in the files
// - Rename files that need clobber-renames
let mut reflinked_paths_entries = Vec::new();
for (parent_dir, files) in reflinked_files {
// files that are either in the clobber map or contain a placeholder,
// we defer to the regular linking that comes after this block
// and re-add them to the paths_by_directory map
for file in files {
if clobber_paths.contains_key(&file.1) || file.0.prefix_placeholder.is_some() {
paths_by_directory
.entry(parent_dir.clone())
.or_insert_with(Vec::new)
.push(file);
} else {
reflinked_paths_entries.push(PathsEntry {
relative_path: file.0.relative_path,
path_type: file.0.path_type.into(),
no_link: file.0.no_link,
sha256: file.0.sha256,
size_in_bytes: file.0.size_in_bytes,
// No placeholder, no clobbering, so these are none for sure
original_path: None,
sha256_in_prefix: None,
file_mode: None,
prefix_placeholder: None,
})
}
}
}

Expand All @@ -704,6 +776,7 @@ pub fn link_package_sync(
let mut path_entries = Vec::with_capacity(entries_in_subdir.len());
for (entry, computed_path) in entries_in_subdir {
let clobber_rename = clobber_paths.get(&entry.relative_path).cloned();

let link_result = link_file(
&entry,
computed_path.clone(),
Expand Down Expand Up @@ -755,6 +828,8 @@ pub fn link_package_sync(
})
.collect::<Result<Vec<_>, _>>()?;

paths.extend(reflinked_paths_entries);

// If this package is a noarch python package we also have to create entry
// points.
//
Expand Down
2 changes: 1 addition & 1 deletion crates/rattler_cache/src/package_cache/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ where

// Validate the package directory.
let validation_result = tokio::task::spawn_blocking(move || {
validate_package_directory(&path_inner, ValidationMode::Full)
validate_package_directory(&path_inner, ValidationMode::Fast)
})
.await;

Expand Down
9 changes: 5 additions & 4 deletions crates/rattler_conda_types/src/prefix_record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ impl PrefixRecord {

/// Parses a `paths.json` file from a file.
pub fn from_path(path: impl AsRef<Path>) -> Result<Self, std::io::Error> {
Self::from_reader(File::open(path.as_ref())?)
Self::from_str(&fs_err::read_to_string(path.as_ref())?)
}

/// Return the canonical file name for a `PrefixRecord`. Takes the form of
Expand All @@ -234,7 +234,8 @@ impl PrefixRecord {
path: impl AsRef<Path>,
pretty: bool,
) -> Result<(), std::io::Error> {
self.write_to(File::create(path.as_ref())?, pretty)
let file = File::create(path.as_ref())?;
self.write_to(BufWriter::with_capacity(50 * 1024, file), pretty)
}

/// Writes the contents of this instance to the file at the specified location.
Expand All @@ -244,9 +245,9 @@ impl PrefixRecord {
pretty: bool,
) -> Result<(), std::io::Error> {
if pretty {
serde_json::to_writer_pretty(BufWriter::new(writer), self)?;
serde_json::to_writer_pretty(writer, self)?;
} else {
serde_json::to_writer(BufWriter::new(writer), self)?;
serde_json::to_writer(writer, self)?;
}
Ok(())
}
Expand Down

0 comments on commit 4e5b274

Please sign in to comment.