From 9e69e4254a761e41b5495f5c9a842d52637af81b Mon Sep 17 00:00:00 2001 From: Eh2406 Date: Thu, 20 Dec 2018 14:19:22 -0500 Subject: [PATCH] Use Cargos fingerprint data to trim to only install rust versions --- README.md | 20 +++- src/main.rs | 52 +++++++-- src/rust_version.rs | 249 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 314 insertions(+), 7 deletions(-) create mode 100644 src/rust_version.rs diff --git a/README.md b/README.md index b2c7885..be01c60 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,24 @@ To clean all build files older than 30 days in the local cargo project run: cargo sweep -t 30 ``` +To clean all build files not made by the currently installed (by rustup) nightly compiler: + +``` +cargo sweep --toolchains="nightly" +``` + +This can be useful if you checked that your library works on stable, but mostly develop on nightly. + + +To clean all build files not made by any of the currently installed (by rustup) compilers: + +``` +cargo sweep -i +``` + +This can be useful if you just updated your compilers with a `rustup update`. + + To preview the results of a sweep run, which is recommended as a first step, add the `-d` flag, for instance: ``` @@ -38,7 +56,7 @@ cargo sweep -t 30 To clean everything but the latest build you will need to run it in several steps. -**DEPRICATED** This behavior can be too agressive, since cargo can skip reading files when building a near-identical build, see #2 and #11. Also, this will be replaced once `build-plan` is stabilized in cargo. +**DEPRICATED** This behavior can be too aggressive, since cargo can skip reading files when building a near-identical build, see #2 and #11. Also, this will be replaced once `build-plan` is stabilized in cargo. ``` cargo sweep -s diff --git a/src/main.rs b/src/main.rs index 6518f52..79a7208 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,6 +14,7 @@ extern crate serde_json; use clap::{App, Arg, ArgGroup, SubCommand}; use failure::Error; use fern::colors::{Color, ColoredLevelConfig}; +use rust_version::remove_not_built_with; use std::{ env, fs::remove_file, @@ -22,6 +23,7 @@ use std::{ }; use walkdir::WalkDir; +mod rust_version; mod stamp; mod util; use self::stamp::Timestamp; @@ -98,14 +100,14 @@ fn find_cargo_projects(root: &Path) -> Vec { /// keeping only files which have been accessed within the given duration. /// Dry specifies if files should actually be removed or not. /// Returns a list of the deleted file/dir paths. -fn try_clean_path<'a>( - path: &'a Path, +fn try_clean_path( + path: &Path, keep_duration: &Duration, dry_run: bool, ) -> Result<(u64), Error> { let mut total_disk_space = 0; let mut target_path = path.to_path_buf(); - target_path.push("target/"); + target_path.push("target"); for entry in WalkDir::new(target_path.to_str().unwrap()) .min_depth(1) .contents_first(true) @@ -118,7 +120,7 @@ fn try_clean_path<'a>( total_disk_space += metadata.len(); if !dry_run { match remove_file(entry.path()) { - Ok(_) => info!("Successfuly removed: {:?}", entry.path()), + Ok(_) => info!("Successfully removed: {:?}", entry.path()), Err(e) => warn!("Failed to remove: {:?} {}", entry.path(), e), }; } else { @@ -130,6 +132,7 @@ fn try_clean_path<'a>( Ok(total_disk_space) } +#[allow(clippy::cyclomatic_complexity)] fn main() { let matches = App::new("Cargo sweep") .version("0.1") @@ -164,17 +167,30 @@ fn main() { .long("file") .help("Load timestamp file in the given path, cleaning everything older"), ) + .arg( + Arg::with_name("installed") + .short("i") + .long("installed") + .help("Keep only artefacts made by Toolchains currently installed by rustup") + ) + .arg( + Arg::with_name("toolchains") + .long("toolchains") + .value_name("toolchains") + .help("Toolchains (currently installed by rustup) that shuld have there artefacts kept.") + .takes_value(true), + ) .arg( Arg::with_name("time") .short("t") .long("time") .value_name("days") - .help("Number of days to backwards to keep") + .help("Number of days backwards to keep. If no value is set uses 30.") .takes_value(true), ) .group( ArgGroup::with_name("timestamp") - .args(&["stamp", "file", "time"]) + .args(&["stamp", "file", "time", "installed", "toolchains"]) .required(true), ) .arg( @@ -216,6 +232,30 @@ fn main() { Ok(_) => {} Err(e) => error!("Failed to write timestamp file: {}", e), } + return; + } + + if matches.is_present("installed") || matches.is_present("toolchains") { + if matches.is_present("recursive") { + for project_path in find_cargo_projects(&path) { + match remove_not_built_with(&project_path, matches.value_of("toolchains"), dry_run) { + Ok(cleaned_amount) if dry_run => { + info!("Would clean: {}", format_bytes(cleaned_amount)) + } + Ok(cleaned_amount) => info!("Cleaned {}", format_bytes(cleaned_amount)), + Err(e) => error!("Failed to clean {:?}: {}", path, e), + }; + } + } else { + match remove_not_built_with(&path, matches.value_of("toolchains"), dry_run) { + Ok(cleaned_amount) if dry_run => { + info!("Would clean: {}", format_bytes(cleaned_amount)) + } + Ok(cleaned_amount) => info!("Cleaned {}", format_bytes(cleaned_amount)), + Err(e) => error!("Failed to clean {:?}: {}", path, e), + }; + } + return; } if matches.is_present("recursive") { diff --git a/src/rust_version.rs b/src/rust_version.rs new file mode 100644 index 0000000..fcdfeac --- /dev/null +++ b/src/rust_version.rs @@ -0,0 +1,249 @@ +#![allow(deprecated)] +use failure::{bail, Error}; +use serde_json::from_str; +use std::collections::HashSet; +use std::hash::{Hash, Hasher, SipHasher}; +use std::process::Command; +use std::{ + fs::{self, remove_dir_all, remove_file, File}, + io::prelude::*, + path::Path, +}; +use walkdir::{DirEntry, WalkDir}; + +/// This has to match the way Cargo hashes a rustc version. +/// As such it is copied from Cargos code. +fn hash_u64(hashable: &H) -> u64 { + let mut hasher = SipHasher::new_with_keys(0, 0); + hashable.hash(&mut hasher); + hasher.finish() +} + +/// This has to match the way Cargo stores a rustc version in a fingerprint file. +#[derive(Deserialize, Debug)] +struct Fingerprint { + rustc: u64, +} + +/// the files and folder tracked by fingerprint have the form `({prefix}-)?{name}-{16 char hex hash}(.{extension})?` +/// this returns `Some({hex hash})` if it is of that form and `None` otherwise. +fn hash_from_path_name(filename: &str) -> Option<&str> { + // maybe just use regex + let name = filename.split('.').next().unwrap(); + let hash = name.rsplit('-').next().unwrap(); + if hash.len() == name.len() { + // we did not find a dash, it cant be a fingerprint matched file. + return None; + } + if !hash.chars().all(|x| x.is_digit(16)) { + // we found a non hex char, it cant be a fingerprint matched file. + return None; + } + if hash.len() != 16 { + // the hash part is the wrong length. + // It is not a fingerprint just a project with an unfortunate name. + return None; + } + Some(hash) +} + +impl Fingerprint { + /// Attempts to load the the Fingerprint data for a given fingerprint directory. + fn load(fingerprint_dir: &Path) -> Result { + for entry in fs::read_dir(fingerprint_dir)? { + let path = entry?.path(); + if let Some(ext) = path.extension() { + if ext == "json" { + let mut file = File::open(&path)?; + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + if let Ok(fing) = from_str(&contents) { + return Ok(fing); + } + } + } + } + bail!("did not fine a fingerprint file in {:?}", fingerprint_dir) + } +} + +fn load_all_fingerprint( + fingerprint_dir: &Path, + instaled_rustc: &HashSet, +) -> Result, Error> { + assert_eq!( + fingerprint_dir + .file_name() + .expect("load takes the path to a .fingerprint directory"), + ".fingerprint" + ); + let mut keep = HashSet::new(); + for entry in fs::read_dir(fingerprint_dir)? { + let path = entry?.path(); + if path.is_dir() { + let f = Fingerprint::load(&path).map(|f| instaled_rustc.contains(&f.rustc)); + // we defalt to keeping, as there are files that dont have the data we need. + if f.unwrap_or(true) { + let name = path.file_name().unwrap().to_string_lossy(); + if let Some(hash) = hash_from_path_name(&name) { + keep.insert(hash.to_string()); + } + } + } + } + debug!("Hashs to keep: {:#?}", keep); + Ok(keep) +} + +fn total_disk_space_dir(dir: &Path) -> u64 { + WalkDir::new(dir) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter_map(|entry| entry.metadata().ok()) + .filter(|metadata| metadata.is_file()) + .fold(0, |acc, m| acc + m.len()) +} + +fn remove_not_matching_in_a_dir( + dir: &Path, + keep: &HashSet, + dry_run: bool, +) -> Result { + let mut total_disk_space = 0; + for entry in fs::read_dir(dir)? { + let entry = entry?; + let metadata = entry.metadata()?; + let path = entry.path(); + let name = path + .file_name() + .expect("folders in a directory dont have a name!?") + .to_string_lossy(); + if let Some(hash) = hash_from_path_name(&name) { + if !keep.contains(hash) { + if path.is_file() { + total_disk_space += metadata.len(); + if !dry_run { + match remove_file(&path) { + Ok(_) => info!("Successfully removed: {:?}", &path), + Err(e) => warn!("Failed to remove: {:?} {}", &path, e), + }; + } else { + info!("Would remove: {:?}", &path); + } + } else if path.is_dir() { + total_disk_space += total_disk_space_dir(&path); + if !dry_run { + match remove_dir_all(&path) { + Ok(_) => info!("Successfully removed: {:?}", &path), + Err(e) => warn!("Failed to remove: {:?} {}", &path, e), + }; + } else { + info!("Would remove: {:?}", &path); + } + } + } + } + } + Ok(total_disk_space) +} + +fn remove_not_built_with_in_a_profile( + dir: &Path, + hashed_rust_vertion_to_keep: &HashSet, + dry_run: bool, +) -> Result { + let mut total_disk_space = 0; + let keep = load_all_fingerprint(&dir.join(".fingerprint"), hashed_rust_vertion_to_keep)?; + total_disk_space += remove_not_matching_in_a_dir(&dir.join(".fingerprint"), &keep, dry_run)?; + total_disk_space += remove_not_matching_in_a_dir(&dir.join("build"), &keep, dry_run)?; + total_disk_space += remove_not_matching_in_a_dir(&dir.join("deps"), &keep, dry_run)?; + // examples is just final artifacts not tracked by fingerprint so skip that one. + // incremental is not tracked by fingerprint so skip that one. + total_disk_space += remove_not_matching_in_a_dir(&dir.join("native"), &keep, dry_run)?; + total_disk_space += remove_not_matching_in_a_dir(dir, &keep, dry_run)?; + Ok(total_disk_space) +} + +fn lookup_all_fingerprint_dirs(dir: &Path) -> impl Iterator { + WalkDir::new(dir) + .min_depth(1) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter(|p| &p.file_name().to_string_lossy() == ".fingerprint") +} + +fn lookup_from_names<'a>(iter: impl Iterator) -> Result, Error> { + iter.map(|x| { + let plus_name = "+".to_owned() + x; + let out = Command::new("rustc").args(&[&plus_name, "-vV"]).output()?; + if !out.status.success() { + bail!(String::from_utf8_lossy(&out.stdout).to_string()); + } + Ok(hash_u64(&String::from_utf8_lossy(&out.stdout))) + }) + .chain( + // Some fingerprints made to track the output of build scripts claim to have been built with a rust that hashes to 0. + // This can be fixed in cargo, but for now this makes sure we don't clean the files. + Some(Ok(0)), + ) + .collect() +} + +fn rustup_toolchain_list() -> Result, Error> { + let out = Command::new("rustup") + .args(&["toolchain", "list"]) + .output()?; + if !out.status.success() { + bail!(String::from_utf8_lossy(&out.stdout).to_string()); + } + Ok(String::from_utf8_lossy(&out.stdout) + .split('\n') + .filter_map(|x| x.split_whitespace().next()) + .map(|x| x.trim().to_owned()) + .collect::>()) +} + +pub fn remove_not_built_with( + dir: &Path, + rust_vertion_to_keep: Option<&str>, + dry_run: bool, +) -> Result { + let mut total_disk_space = 0; + let hashed_rust_version_to_keep = if let Some(names) = rust_vertion_to_keep { + info!( + "Using specified installed toolchains: {:?}", + names.split(',').collect::>() + ); + lookup_from_names(names.split(','))? + } else { + let rustup_toolchain_list = rustup_toolchain_list()?; + info!( + "Using all installed toolchains: {:?}", + rustup_toolchain_list + ); + lookup_from_names(rustup_toolchain_list.iter().map(|x| x.as_str()))? + }; + for fing in lookup_all_fingerprint_dirs(&dir.join("target")) { + total_disk_space += remove_not_built_with_in_a_profile( + fing.into_path().parent().unwrap(), + &hashed_rust_version_to_keep, + dry_run, + )?; + } + Ok(total_disk_space) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_rustup_toolchain_list_runs() { + rustup_toolchain_list().unwrap(); + } + + #[test] + fn test_lookup_from_rustup_runs() { + lookup_from_names(rustup_toolchain_list().unwrap().iter().map(|x| x.as_str())).unwrap(); + } +}