From 5cca4e8c2496a1925a549c26c02ad11dd105c7a4 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 8 Jan 2018 09:38:40 -0800 Subject: [PATCH] Leverage local links on git checkouts This commit updates the handling of git checkouts from the database to use hardlinks if possible, speeding up this operation for large repositories significantly. As a refresher, Cargo caches git repositories in a few locations to speed up local usage of git repositories. Cargo has a "database" folder which is a bare checkout of any git repository Cargo has cached historically. This database folder contains effectively a bunch of databases for remote repos that are updated periodically. When actually building a crate Cargo will clone this database into a different location, the checkouts folder. Each rev we build (ever) is cached in the checkouts folder. This means that once a checkout directory is created it's frozen for all of time. This latter step is what this commit is optimizing. When checking out the database onto the local filesystem at a particular revision. Previously we were instructing libgit2 to fall back to a "git aware" transport which was exceedingly slow on some systems for filesystem-to-filesystem transfers. This optimization (we just forgot to turn it on in libgit2) is a longstanding one and should speed this up significantly! Closes #4604 --- Cargo.toml | 2 +- src/cargo/sources/git/source.rs | 6 +-- src/cargo/sources/git/utils.rs | 90 ++++++++++++++++++++++++--------- 3 files changed, 69 insertions(+), 29 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 29f3055721a..5f5361e2ef2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ failure = "0.1.1" filetime = "0.1" flate2 = "1.0" fs2 = "0.4" -git2 = "0.6" +git2 = "0.6.11" git2-curl = "0.7" glob = "0.2" hex = "0.3" diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index a07782b4fdf..058c8e9113f 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -159,7 +159,7 @@ impl<'cfg> Source for GitSource<'cfg> { let should_update = actual_rev.is_err() || self.source_id.precise().is_none(); - let (repo, actual_rev) = if should_update { + let (db, actual_rev) = if should_update { self.config.shell().status("Updating", format!("git repository `{}`", self.remote.url()))?; @@ -175,7 +175,7 @@ impl<'cfg> Source for GitSource<'cfg> { // Don’t use the full hash, // to contribute less to reaching the path length limit on Windows: // https://github.com/servo/servo/pull/14397 - let short_id = repo.to_short_id(actual_rev.clone()).unwrap(); + let short_id = db.to_short_id(actual_rev.clone()).unwrap(); let checkout_path = lock.parent().join("checkouts") .join(&self.ident).join(short_id.as_str()); @@ -185,7 +185,7 @@ impl<'cfg> Source for GitSource<'cfg> { // in scope so the destructors here won't tamper with too much. // Checkout is immutable, so we don't need to protect it with a lock once // it is created. - repo.copy_to(actual_rev.clone(), &checkout_path, self.config)?; + db.copy_to(actual_rev.clone(), &checkout_path, self.config)?; let source_id = self.source_id.with_precise(Some(actual_rev.to_string())); let path_source = PathSource::new_recursive(&checkout_path, diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index f29bdb8fdb9..7c32424267d 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -12,7 +12,7 @@ use url::Url; use core::GitReference; use util::{ToUrl, internal, Config, network, Progress}; -use util::errors::{CargoResult, CargoResultExt, CargoError, Internal}; +use util::errors::{CargoResult, CargoResultExt, Internal}; #[derive(PartialEq, Clone, Debug)] pub struct GitRevision(git2::Oid); @@ -226,14 +226,43 @@ impl<'a> GitCheckout<'a> { fs::create_dir_all(&dirname).chain_err(|| { format!("Couldn't mkdir {}", dirname.display()) })?; - if fs::metadata(&into).is_ok() { + if into.exists() { fs::remove_dir_all(into).chain_err(|| { format!("Couldn't rmdir {}", into.display()) })?; } - let repo = git2::Repository::init(into)?; - let mut checkout = GitCheckout::new(into, database, revision, repo); - checkout.fetch(config)?; + + // we're doing a local filesystem-to-filesystem clone so there should + // be no need to respect global configuration options, so pass in + // an empty instance of `git2::Config` below. + let git_config = git2::Config::new()?; + + // Clone the repository, but make sure we use the "local" option in + // libgit2 which will attempt to use hardlinks to set up the database. + // This should speed up the clone operation quite a bit if it works. + // + // Note that we still use the same fetch options because while we don't + // need authentication information we may want progress bars and such. + let url = database.path.to_url()?; + let mut repo = None; + with_fetch_options(&git_config, &url, config, &mut |fopts| { + let mut checkout = git2::build::CheckoutBuilder::new(); + checkout.dry_run(); // we'll do this below during a `reset` + + let r = git2::build::RepoBuilder::new() + // use hard links and/or copy the database, we're doing a + // filesystem clone so this'll speed things up quite a bit. + .clone_local(git2::build::CloneLocal::Local) + .with_checkout(checkout) + .fetch_options(fopts) + // .remote_create(|repo, _name, url| repo.remote_anonymous(url)) + .clone(url.as_str(), into)?; + repo = Some(r); + Ok(()) + })?; + let repo = repo.unwrap(); + + let checkout = GitCheckout::new(into, database, revision, repo); checkout.reset(config)?; Ok(checkout) } @@ -242,7 +271,7 @@ impl<'a> GitCheckout<'a> { match self.repo.revparse_single("HEAD") { Ok(ref head) if head.id() == self.revision.0 => { // See comments in reset() for why we check this - fs::metadata(self.location.join(".cargo-ok")).is_ok() + self.location.join(".cargo-ok").exists() } _ => false, } @@ -555,6 +584,33 @@ fn reset(repo: &git2::Repository, Ok(()) } +pub fn with_fetch_options(git_config: &git2::Config, + url: &Url, + config: &Config, + cb: &mut FnMut(git2::FetchOptions) -> CargoResult<()>) + -> CargoResult<()> +{ + let mut progress = Progress::new("Fetch", config); + network::with_retry(config, || { + with_authentication(url.as_str(), git_config, |f| { + let mut rcb = git2::RemoteCallbacks::new(); + rcb.credentials(f); + + rcb.transfer_progress(|stats| { + progress.tick(stats.indexed_objects(), stats.total_objects()).is_ok() + }); + + // Create a local anonymous remote in the repository to fetch the + // url + let mut opts = git2::FetchOptions::new(); + opts.remote_callbacks(rcb) + .download_tags(git2::AutotagOption::All); + cb(opts) + })?; + Ok(()) + }) +} + pub fn fetch(repo: &mut git2::Repository, url: &Url, refspec: &str, @@ -585,26 +641,10 @@ pub fn fetch(repo: &mut git2::Repository, maybe_gc_repo(repo)?; debug!("doing a fetch for {}", url); - let mut progress = Progress::new("Fetch", config); - with_authentication(url.as_str(), &repo.config()?, |f| { - let mut cb = git2::RemoteCallbacks::new(); - cb.credentials(f); - - cb.transfer_progress(|stats| { - progress.tick(stats.indexed_objects(), stats.total_objects()).is_ok() - }); - - // Create a local anonymous remote in the repository to fetch the url + with_fetch_options(&repo.config()?, url, config, &mut |mut opts| { + debug!("initiating fetch of {} from {}", refspec, url); let mut remote = repo.remote_anonymous(url.as_str())?; - let mut opts = git2::FetchOptions::new(); - opts.remote_callbacks(cb) - .download_tags(git2::AutotagOption::All); - - network::with_retry(config, || { - debug!("initiating fetch of {} from {}", refspec, url); - remote.fetch(&[refspec], Some(&mut opts), None) - .map_err(CargoError::from) - })?; + remote.fetch(&[refspec], Some(&mut opts), None)?; Ok(()) }) }