Skip to content

Commit

Permalink
Use Cargos fingerprint data to trim to only install rust versions
Browse files Browse the repository at this point in the history
  • Loading branch information
Eh2406 committed Dec 20, 2018
1 parent 782f192 commit 9e69e42
Show file tree
Hide file tree
Showing 3 changed files with 314 additions and 7 deletions.
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,24 @@ To clean all build files older than 30 days in the local cargo project run:
cargo sweep -t 30
```

To clean all build files not made by the currently installed (by rustup) nightly compiler:

```
cargo sweep --toolchains="nightly"
```

This can be useful if you checked that your library works on stable, but mostly develop on nightly.


To clean all build files not made by any of the currently installed (by rustup) compilers:

```
cargo sweep -i
```

This can be useful if you just updated your compilers with a `rustup update`.


To preview the results of a sweep run, which is recommended as a first step, add the `-d` flag, for instance:

```
Expand All @@ -38,7 +56,7 @@ cargo sweep -t 30 <path>

To clean everything but the latest build you will need to run it in several steps.

**DEPRICATED** This behavior can be too agressive, since cargo can skip reading files when building a near-identical build, see #2 and #11. Also, this will be replaced once `build-plan` is stabilized in cargo.
**DEPRICATED** This behavior can be too aggressive, since cargo can skip reading files when building a near-identical build, see #2 and #11. Also, this will be replaced once `build-plan` is stabilized in cargo.

```
cargo sweep -s
Expand Down
52 changes: 46 additions & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ extern crate serde_json;
use clap::{App, Arg, ArgGroup, SubCommand};
use failure::Error;
use fern::colors::{Color, ColoredLevelConfig};
use rust_version::remove_not_built_with;
use std::{
env,
fs::remove_file,
Expand All @@ -22,6 +23,7 @@ use std::{
};
use walkdir::WalkDir;

mod rust_version;
mod stamp;
mod util;
use self::stamp::Timestamp;
Expand Down Expand Up @@ -98,14 +100,14 @@ fn find_cargo_projects(root: &Path) -> Vec<PathBuf> {
/// keeping only files which have been accessed within the given duration.
/// Dry specifies if files should actually be removed or not.
/// Returns a list of the deleted file/dir paths.
fn try_clean_path<'a>(
path: &'a Path,
fn try_clean_path(
path: &Path,
keep_duration: &Duration,
dry_run: bool,
) -> Result<(u64), Error> {
let mut total_disk_space = 0;
let mut target_path = path.to_path_buf();
target_path.push("target/");
target_path.push("target");
for entry in WalkDir::new(target_path.to_str().unwrap())
.min_depth(1)
.contents_first(true)
Expand All @@ -118,7 +120,7 @@ fn try_clean_path<'a>(
total_disk_space += metadata.len();
if !dry_run {
match remove_file(entry.path()) {
Ok(_) => info!("Successfuly removed: {:?}", entry.path()),
Ok(_) => info!("Successfully removed: {:?}", entry.path()),
Err(e) => warn!("Failed to remove: {:?} {}", entry.path(), e),
};
} else {
Expand All @@ -130,6 +132,7 @@ fn try_clean_path<'a>(
Ok(total_disk_space)
}

#[allow(clippy::cyclomatic_complexity)]
fn main() {
let matches = App::new("Cargo sweep")
.version("0.1")
Expand Down Expand Up @@ -164,17 +167,30 @@ fn main() {
.long("file")
.help("Load timestamp file in the given path, cleaning everything older"),
)
.arg(
Arg::with_name("installed")
.short("i")
.long("installed")
.help("Keep only artefacts made by Toolchains currently installed by rustup")
)
.arg(
Arg::with_name("toolchains")
.long("toolchains")
.value_name("toolchains")
.help("Toolchains (currently installed by rustup) that shuld have there artefacts kept.")
.takes_value(true),
)
.arg(
Arg::with_name("time")
.short("t")
.long("time")
.value_name("days")
.help("Number of days to backwards to keep")
.help("Number of days backwards to keep. If no value is set uses 30.")
.takes_value(true),
)
.group(
ArgGroup::with_name("timestamp")
.args(&["stamp", "file", "time"])
.args(&["stamp", "file", "time", "installed", "toolchains"])
.required(true),
)
.arg(
Expand Down Expand Up @@ -216,6 +232,30 @@ fn main() {
Ok(_) => {}
Err(e) => error!("Failed to write timestamp file: {}", e),
}
return;
}

if matches.is_present("installed") || matches.is_present("toolchains") {
if matches.is_present("recursive") {
for project_path in find_cargo_projects(&path) {
match remove_not_built_with(&project_path, matches.value_of("toolchains"), dry_run) {
Ok(cleaned_amount) if dry_run => {
info!("Would clean: {}", format_bytes(cleaned_amount))
}
Ok(cleaned_amount) => info!("Cleaned {}", format_bytes(cleaned_amount)),
Err(e) => error!("Failed to clean {:?}: {}", path, e),
};
}
} else {
match remove_not_built_with(&path, matches.value_of("toolchains"), dry_run) {
Ok(cleaned_amount) if dry_run => {
info!("Would clean: {}", format_bytes(cleaned_amount))
}
Ok(cleaned_amount) => info!("Cleaned {}", format_bytes(cleaned_amount)),
Err(e) => error!("Failed to clean {:?}: {}", path, e),
};
}
return;
}

if matches.is_present("recursive") {
Expand Down
249 changes: 249 additions & 0 deletions src/rust_version.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
#![allow(deprecated)]
use failure::{bail, Error};
use serde_json::from_str;
use std::collections::HashSet;
use std::hash::{Hash, Hasher, SipHasher};
use std::process::Command;
use std::{
fs::{self, remove_dir_all, remove_file, File},
io::prelude::*,
path::Path,
};
use walkdir::{DirEntry, WalkDir};

/// This has to match the way Cargo hashes a rustc version.
/// As such it is copied from Cargos code.
fn hash_u64<H: Hash>(hashable: &H) -> u64 {
let mut hasher = SipHasher::new_with_keys(0, 0);
hashable.hash(&mut hasher);
hasher.finish()
}

/// This has to match the way Cargo stores a rustc version in a fingerprint file.
#[derive(Deserialize, Debug)]
struct Fingerprint {
rustc: u64,
}

/// the files and folder tracked by fingerprint have the form `({prefix}-)?{name}-{16 char hex hash}(.{extension})?`
/// this returns `Some({hex hash})` if it is of that form and `None` otherwise.
fn hash_from_path_name(filename: &str) -> Option<&str> {
// maybe just use regex
let name = filename.split('.').next().unwrap();
let hash = name.rsplit('-').next().unwrap();
if hash.len() == name.len() {
// we did not find a dash, it cant be a fingerprint matched file.
return None;
}
if !hash.chars().all(|x| x.is_digit(16)) {
// we found a non hex char, it cant be a fingerprint matched file.
return None;
}
if hash.len() != 16 {
// the hash part is the wrong length.
// It is not a fingerprint just a project with an unfortunate name.
return None;
}
Some(hash)
}

impl Fingerprint {
/// Attempts to load the the Fingerprint data for a given fingerprint directory.
fn load(fingerprint_dir: &Path) -> Result<Self, Error> {
for entry in fs::read_dir(fingerprint_dir)? {
let path = entry?.path();
if let Some(ext) = path.extension() {
if ext == "json" {
let mut file = File::open(&path)?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
if let Ok(fing) = from_str(&contents) {
return Ok(fing);
}
}
}
}
bail!("did not fine a fingerprint file in {:?}", fingerprint_dir)
}
}

fn load_all_fingerprint(
fingerprint_dir: &Path,
instaled_rustc: &HashSet<u64>,
) -> Result<HashSet<String>, Error> {
assert_eq!(
fingerprint_dir
.file_name()
.expect("load takes the path to a .fingerprint directory"),
".fingerprint"
);
let mut keep = HashSet::new();
for entry in fs::read_dir(fingerprint_dir)? {
let path = entry?.path();
if path.is_dir() {
let f = Fingerprint::load(&path).map(|f| instaled_rustc.contains(&f.rustc));
// we defalt to keeping, as there are files that dont have the data we need.
if f.unwrap_or(true) {
let name = path.file_name().unwrap().to_string_lossy();
if let Some(hash) = hash_from_path_name(&name) {
keep.insert(hash.to_string());
}
}
}
}
debug!("Hashs to keep: {:#?}", keep);
Ok(keep)
}

fn total_disk_space_dir(dir: &Path) -> u64 {
WalkDir::new(dir)
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| entry.metadata().ok())
.filter(|metadata| metadata.is_file())
.fold(0, |acc, m| acc + m.len())
}

fn remove_not_matching_in_a_dir(
dir: &Path,
keep: &HashSet<String>,
dry_run: bool,
) -> Result<u64, Error> {
let mut total_disk_space = 0;
for entry in fs::read_dir(dir)? {
let entry = entry?;
let metadata = entry.metadata()?;
let path = entry.path();
let name = path
.file_name()
.expect("folders in a directory dont have a name!?")
.to_string_lossy();
if let Some(hash) = hash_from_path_name(&name) {
if !keep.contains(hash) {
if path.is_file() {
total_disk_space += metadata.len();
if !dry_run {
match remove_file(&path) {
Ok(_) => info!("Successfully removed: {:?}", &path),
Err(e) => warn!("Failed to remove: {:?} {}", &path, e),
};
} else {
info!("Would remove: {:?}", &path);
}
} else if path.is_dir() {
total_disk_space += total_disk_space_dir(&path);
if !dry_run {
match remove_dir_all(&path) {
Ok(_) => info!("Successfully removed: {:?}", &path),
Err(e) => warn!("Failed to remove: {:?} {}", &path, e),
};
} else {
info!("Would remove: {:?}", &path);
}
}
}
}
}
Ok(total_disk_space)
}

fn remove_not_built_with_in_a_profile(
dir: &Path,
hashed_rust_vertion_to_keep: &HashSet<u64>,
dry_run: bool,
) -> Result<u64, Error> {
let mut total_disk_space = 0;
let keep = load_all_fingerprint(&dir.join(".fingerprint"), hashed_rust_vertion_to_keep)?;
total_disk_space += remove_not_matching_in_a_dir(&dir.join(".fingerprint"), &keep, dry_run)?;
total_disk_space += remove_not_matching_in_a_dir(&dir.join("build"), &keep, dry_run)?;
total_disk_space += remove_not_matching_in_a_dir(&dir.join("deps"), &keep, dry_run)?;
// examples is just final artifacts not tracked by fingerprint so skip that one.
// incremental is not tracked by fingerprint so skip that one.
total_disk_space += remove_not_matching_in_a_dir(&dir.join("native"), &keep, dry_run)?;
total_disk_space += remove_not_matching_in_a_dir(dir, &keep, dry_run)?;
Ok(total_disk_space)
}

fn lookup_all_fingerprint_dirs(dir: &Path) -> impl Iterator<Item = DirEntry> {
WalkDir::new(dir)
.min_depth(1)
.into_iter()
.filter_map(|entry| entry.ok())
.filter(|p| &p.file_name().to_string_lossy() == ".fingerprint")
}

fn lookup_from_names<'a>(iter: impl Iterator<Item = &'a str>) -> Result<HashSet<u64>, Error> {
iter.map(|x| {
let plus_name = "+".to_owned() + x;
let out = Command::new("rustc").args(&[&plus_name, "-vV"]).output()?;
if !out.status.success() {
bail!(String::from_utf8_lossy(&out.stdout).to_string());
}
Ok(hash_u64(&String::from_utf8_lossy(&out.stdout)))
})
.chain(
// Some fingerprints made to track the output of build scripts claim to have been built with a rust that hashes to 0.
// This can be fixed in cargo, but for now this makes sure we don't clean the files.
Some(Ok(0)),
)
.collect()
}

fn rustup_toolchain_list() -> Result<Vec<String>, Error> {
let out = Command::new("rustup")
.args(&["toolchain", "list"])
.output()?;
if !out.status.success() {
bail!(String::from_utf8_lossy(&out.stdout).to_string());
}
Ok(String::from_utf8_lossy(&out.stdout)
.split('\n')
.filter_map(|x| x.split_whitespace().next())
.map(|x| x.trim().to_owned())
.collect::<Vec<String>>())
}

pub fn remove_not_built_with(
dir: &Path,
rust_vertion_to_keep: Option<&str>,
dry_run: bool,
) -> Result<u64, Error> {
let mut total_disk_space = 0;
let hashed_rust_version_to_keep = if let Some(names) = rust_vertion_to_keep {
info!(
"Using specified installed toolchains: {:?}",
names.split(',').collect::<Vec<_>>()
);
lookup_from_names(names.split(','))?
} else {
let rustup_toolchain_list = rustup_toolchain_list()?;
info!(
"Using all installed toolchains: {:?}",
rustup_toolchain_list
);
lookup_from_names(rustup_toolchain_list.iter().map(|x| x.as_str()))?
};
for fing in lookup_all_fingerprint_dirs(&dir.join("target")) {
total_disk_space += remove_not_built_with_in_a_profile(
fing.into_path().parent().unwrap(),
&hashed_rust_version_to_keep,
dry_run,
)?;
}
Ok(total_disk_space)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_rustup_toolchain_list_runs() {
rustup_toolchain_list().unwrap();
}

#[test]
fn test_lookup_from_rustup_runs() {
lookup_from_names(rustup_toolchain_list().unwrap().iter().map(|x| x.as_str())).unwrap();
}
}

0 comments on commit 9e69e42

Please sign in to comment.