Skip to content

Commit

Permalink
Auto merge of #4919 - alexcrichton:faster-git-clone, r=matklad
Browse files Browse the repository at this point in the history
Leverage local links on git checkouts

This commit updates the handling of git checkouts from the database to use
hardlinks if possible, speeding up this operation for large repositories
significantly.

As a refresher, Cargo caches git repositories in a few locations to speed up
local usage of git repositories. Cargo has a "database" folder which is a bare
checkout of any git repository Cargo has cached historically. This database
folder contains effectively a bunch of databases for remote repos that are
updated periodically.

When actually building a crate Cargo will clone this database into a different
location, the checkouts folder. Each rev we build (ever) is cached in the
checkouts folder. This means that once a checkout directory is created it's
frozen for all of time.

This latter step is what this commit is optimizing. When checking out the
database onto the local filesystem at a particular revision. Previously we were
instructing libgit2 to fall back to a "git aware" transport which was
exceedingly slow on some systems for filesystem-to-filesystem transfers. This
optimization (we just forgot to turn it on in libgit2) is a longstanding one and
should speed this up significantly!

Closes #4604
  • Loading branch information
bors committed Jan 8, 2018
2 parents 93bf2a3 + 5cca4e8 commit c4003c4
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 29 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ failure = "0.1.1"
filetime = "0.1"
flate2 = "1.0"
fs2 = "0.4"
git2 = "0.6"
git2 = "0.6.11"
git2-curl = "0.7"
glob = "0.2"
hex = "0.3"
Expand Down
6 changes: 3 additions & 3 deletions src/cargo/sources/git/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ impl<'cfg> Source for GitSource<'cfg> {
let should_update = actual_rev.is_err() ||
self.source_id.precise().is_none();

let (repo, actual_rev) = if should_update {
let (db, actual_rev) = if should_update {
self.config.shell().status("Updating",
format!("git repository `{}`", self.remote.url()))?;

Expand All @@ -175,7 +175,7 @@ impl<'cfg> Source for GitSource<'cfg> {
// Don’t use the full hash,
// to contribute less to reaching the path length limit on Windows:
// https://github.com/servo/servo/pull/14397
let short_id = repo.to_short_id(actual_rev.clone()).unwrap();
let short_id = db.to_short_id(actual_rev.clone()).unwrap();

let checkout_path = lock.parent().join("checkouts")
.join(&self.ident).join(short_id.as_str());
Expand All @@ -185,7 +185,7 @@ impl<'cfg> Source for GitSource<'cfg> {
// in scope so the destructors here won't tamper with too much.
// Checkout is immutable, so we don't need to protect it with a lock once
// it is created.
repo.copy_to(actual_rev.clone(), &checkout_path, self.config)?;
db.copy_to(actual_rev.clone(), &checkout_path, self.config)?;

let source_id = self.source_id.with_precise(Some(actual_rev.to_string()));
let path_source = PathSource::new_recursive(&checkout_path,
Expand Down
90 changes: 65 additions & 25 deletions src/cargo/sources/git/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use url::Url;

use core::GitReference;
use util::{ToUrl, internal, Config, network, Progress};
use util::errors::{CargoResult, CargoResultExt, CargoError, Internal};
use util::errors::{CargoResult, CargoResultExt, Internal};

#[derive(PartialEq, Clone, Debug)]
pub struct GitRevision(git2::Oid);
Expand Down Expand Up @@ -226,14 +226,43 @@ impl<'a> GitCheckout<'a> {
fs::create_dir_all(&dirname).chain_err(|| {
format!("Couldn't mkdir {}", dirname.display())
})?;
if fs::metadata(&into).is_ok() {
if into.exists() {
fs::remove_dir_all(into).chain_err(|| {
format!("Couldn't rmdir {}", into.display())
})?;
}
let repo = git2::Repository::init(into)?;
let mut checkout = GitCheckout::new(into, database, revision, repo);
checkout.fetch(config)?;

// we're doing a local filesystem-to-filesystem clone so there should
// be no need to respect global configuration options, so pass in
// an empty instance of `git2::Config` below.
let git_config = git2::Config::new()?;

// Clone the repository, but make sure we use the "local" option in
// libgit2 which will attempt to use hardlinks to set up the database.
// This should speed up the clone operation quite a bit if it works.
//
// Note that we still use the same fetch options because while we don't
// need authentication information we may want progress bars and such.
let url = database.path.to_url()?;
let mut repo = None;
with_fetch_options(&git_config, &url, config, &mut |fopts| {
let mut checkout = git2::build::CheckoutBuilder::new();
checkout.dry_run(); // we'll do this below during a `reset`

let r = git2::build::RepoBuilder::new()
// use hard links and/or copy the database, we're doing a
// filesystem clone so this'll speed things up quite a bit.
.clone_local(git2::build::CloneLocal::Local)
.with_checkout(checkout)
.fetch_options(fopts)
// .remote_create(|repo, _name, url| repo.remote_anonymous(url))
.clone(url.as_str(), into)?;
repo = Some(r);
Ok(())
})?;
let repo = repo.unwrap();

let checkout = GitCheckout::new(into, database, revision, repo);
checkout.reset(config)?;
Ok(checkout)
}
Expand All @@ -242,7 +271,7 @@ impl<'a> GitCheckout<'a> {
match self.repo.revparse_single("HEAD") {
Ok(ref head) if head.id() == self.revision.0 => {
// See comments in reset() for why we check this
fs::metadata(self.location.join(".cargo-ok")).is_ok()
self.location.join(".cargo-ok").exists()
}
_ => false,
}
Expand Down Expand Up @@ -555,6 +584,33 @@ fn reset(repo: &git2::Repository,
Ok(())
}

pub fn with_fetch_options(git_config: &git2::Config,
url: &Url,
config: &Config,
cb: &mut FnMut(git2::FetchOptions) -> CargoResult<()>)
-> CargoResult<()>
{
let mut progress = Progress::new("Fetch", config);
network::with_retry(config, || {
with_authentication(url.as_str(), git_config, |f| {
let mut rcb = git2::RemoteCallbacks::new();
rcb.credentials(f);

rcb.transfer_progress(|stats| {
progress.tick(stats.indexed_objects(), stats.total_objects()).is_ok()
});

// Create a local anonymous remote in the repository to fetch the
// url
let mut opts = git2::FetchOptions::new();
opts.remote_callbacks(rcb)
.download_tags(git2::AutotagOption::All);
cb(opts)
})?;
Ok(())
})
}

pub fn fetch(repo: &mut git2::Repository,
url: &Url,
refspec: &str,
Expand Down Expand Up @@ -585,26 +641,10 @@ pub fn fetch(repo: &mut git2::Repository,
maybe_gc_repo(repo)?;

debug!("doing a fetch for {}", url);
let mut progress = Progress::new("Fetch", config);
with_authentication(url.as_str(), &repo.config()?, |f| {
let mut cb = git2::RemoteCallbacks::new();
cb.credentials(f);

cb.transfer_progress(|stats| {
progress.tick(stats.indexed_objects(), stats.total_objects()).is_ok()
});

// Create a local anonymous remote in the repository to fetch the url
with_fetch_options(&repo.config()?, url, config, &mut |mut opts| {
debug!("initiating fetch of {} from {}", refspec, url);
let mut remote = repo.remote_anonymous(url.as_str())?;
let mut opts = git2::FetchOptions::new();
opts.remote_callbacks(cb)
.download_tags(git2::AutotagOption::All);

network::with_retry(config, || {
debug!("initiating fetch of {} from {}", refspec, url);
remote.fetch(&[refspec], Some(&mut opts), None)
.map_err(CargoError::from)
})?;
remote.fetch(&[refspec], Some(&mut opts), None)?;
Ok(())
})
}
Expand Down

0 comments on commit c4003c4

Please sign in to comment.