Skip to content
This repository has been archived by the owner on Jan 13, 2025. It is now read-only.

Move async remove to snapshot_utils.rs #29406

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 1 addition & 86 deletions core/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ use {
snapshot_config::SnapshotConfig,
snapshot_hash::StartingSnapshotHashes,
snapshot_package::PendingSnapshotPackage,
snapshot_utils,
snapshot_utils::{self, move_and_async_delete_path},
},
solana_sdk::{
clock::Slot,
Expand Down Expand Up @@ -2002,91 +2002,6 @@ fn get_stake_percent_in_gossip(bank: &Bank, cluster_info: &ClusterInfo, log: boo
online_stake_percentage as u64
}

/// Delete directories/files asynchronously to avoid blocking on it.
/// Fist, in sync context, rename the original path to *_deleted,
/// then spawn a thread to delete the renamed path.
/// If the process is killed and the deleting process is not done,
/// the leftover path will be deleted in the next process life, so
/// there is no file space leaking.
pub fn move_and_async_delete_path(path: impl AsRef<Path> + Copy) {
let mut path_delete = PathBuf::new();
path_delete.push(path);
path_delete.set_file_name(format!(
"{}{}",
path_delete.file_name().unwrap().to_str().unwrap(),
"_to_be_deleted"
));

if path_delete.exists() {
std::fs::remove_dir_all(&path_delete).unwrap();
}

if !path.as_ref().exists() {
return;
}

if let Err(err) = std::fs::rename(path, &path_delete) {
warn!(
"Path renaming failed: {}. Falling back to rm_dir in sync mode",
err.to_string()
);
delete_contents_of_path(path);
return;
}

Builder::new()
.name("solDeletePath".to_string())
.spawn(move || {
std::fs::remove_dir_all(path_delete).unwrap();
})
.unwrap();
}

/// Delete the files and subdirectories in a directory.
/// This is useful if the process does not have permission
/// to delete the top level directory it might be able to
/// delete the contents of that directory.
fn delete_contents_of_path(path: impl AsRef<Path> + Copy) {
if let Ok(dir_entries) = std::fs::read_dir(path) {
for entry in dir_entries.flatten() {
let sub_path = entry.path();
let metadata = match entry.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!(
"Failed to get metadata for {}. Error: {}",
sub_path.display(),
err.to_string()
);
break;
}
};
if metadata.is_dir() {
if let Err(err) = std::fs::remove_dir_all(&sub_path) {
warn!(
"Failed to remove sub directory {}. Error: {}",
sub_path.display(),
err.to_string()
);
}
} else if metadata.is_file() {
if let Err(err) = std::fs::remove_file(&sub_path) {
warn!(
"Failed to remove file {}. Error: {}",
sub_path.display(),
err.to_string()
);
}
}
}
} else {
warn!(
"Failed to read the sub paths of {}",
path.as_ref().display()
);
}
}

fn cleanup_accounts_paths(config: &ValidatorConfig) {
for accounts_path in &config.account_paths {
move_and_async_delete_path(accounts_path);
Expand Down
9 changes: 3 additions & 6 deletions ledger-tool/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@ use {
},
},
solana_cli_output::{CliAccount, CliAccountNewConfig, OutputFormat},
solana_core::{
system_monitor_service::{SystemMonitorService, SystemMonitorStatsReportConfig},
validator::move_and_async_delete_path,
},
solana_core::system_monitor_service::{SystemMonitorService, SystemMonitorStatsReportConfig},
solana_entry::entry::Entry,
solana_geyser_plugin_manager::geyser_plugin_service::GeyserPluginService,
solana_ledger::{
Expand Down Expand Up @@ -63,8 +60,8 @@ use {
snapshot_hash::StartingSnapshotHashes,
snapshot_minimizer::SnapshotMinimizer,
snapshot_utils::{
self, ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION,
SUPPORTED_ARCHIVE_COMPRESSION,
self, move_and_async_delete_path, ArchiveFormat, SnapshotVersion,
DEFAULT_ARCHIVE_COMPRESSION, SUPPORTED_ARCHIVE_COMPRESSION,
},
},
solana_sdk::{
Expand Down
85 changes: 85 additions & 0 deletions runtime/src/snapshot_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,91 @@ pub enum VerifySlotDeltasError {
BadSlotHistory,
}

/// Delete the files and subdirectories in a directory.
/// This is useful if the process does not have permission
/// to delete the top level directory it might be able to
/// delete the contents of that directory.
fn delete_contents_of_path(path: impl AsRef<Path> + Copy) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Why did we reverse the order of these functions? All the other pub/helper pairs in this file are in the order: pub first, helper(s) immediately below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I copied the whole block from validator.rs to this file. I guess there is no such order convention in that file. I can reverse it here. But I think this kind of convention does not matter, and is hard to maintain. When a function is changed from private to public, is there need to generate a big diff just to maintain this order?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the diff it looks like in core/src/validator.rs the order of functions is

  1. move_and_async_delete_path()
  2. delete_contents_of_path()

And in runtime/src/snapshot_utils.rs the order is now

  1. delete_contents_of_path()
  2. move_and_async_delete_path()

My interpretation of @apfitzge's question is "why change this order?"

I understand/agree that there is not a convention/requirement for function ordering, and agree that public to private should not generate a large diff. In this case, the diff for moving code between files will be large no matter what. It's a tiny bit easier to review the diff if the function ordering does not change though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, was just wondering why the order did get reversed. I tried checking the new vs old blocks by just ctrl-Fing the entire moved code block, which failed due to the fn re-ordering.

if let Ok(dir_entries) = std::fs::read_dir(path) {
for entry in dir_entries.flatten() {
let sub_path = entry.path();
let metadata = match entry.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!(
"Failed to get metadata for {}. Error: {}",
sub_path.display(),
err.to_string()
);
break;
}
};
if metadata.is_dir() {
if let Err(err) = std::fs::remove_dir_all(&sub_path) {
warn!(
"Failed to remove sub directory {}. Error: {}",
sub_path.display(),
err.to_string()
);
}
} else if metadata.is_file() {
if let Err(err) = std::fs::remove_file(&sub_path) {
warn!(
"Failed to remove file {}. Error: {}",
sub_path.display(),
err.to_string()
);
}
}
}
} else {
warn!(
"Failed to read the sub paths of {}",
path.as_ref().display()
);
}
}

/// Delete directories/files asynchronously to avoid blocking on it.
/// Fist, in sync context, rename the original path to *_deleted,
/// then spawn a thread to delete the renamed path.
/// If the process is killed and the deleting process is not done,
/// the leftover path will be deleted in the next process life, so
/// there is no file space leaking.
pub fn move_and_async_delete_path(path: impl AsRef<Path> + Copy) {
let mut path_delete = PathBuf::new();
path_delete.push(path);
path_delete.set_file_name(format!(
"{}{}",
path_delete.file_name().unwrap().to_str().unwrap(),
"_to_be_deleted"
));

if path_delete.exists() {
std::fs::remove_dir_all(&path_delete).unwrap();
}

if !path.as_ref().exists() {
return;
}

if let Err(err) = std::fs::rename(path, &path_delete) {
warn!(
"Path renaming failed: {}. Falling back to rm_dir in sync mode",
err.to_string()
);
delete_contents_of_path(path);
return;
}

Builder::new()
.name("solDeletePath".to_string())
.spawn(move || {
std::fs::remove_dir_all(path_delete).unwrap();
})
.unwrap();
}

/// If the validator halts in the middle of `archive_snapshot_package()`, the temporary staging
/// directory won't be cleaned up. Call this function to clean them up.
pub fn remove_tmp_snapshot_archives(snapshot_archives_dir: impl AsRef<Path>) {
Expand Down