From 175926397e3d8d41442d5e8704d41c42da2c0528 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 2 Dec 2022 09:21:42 +0100 Subject: [PATCH] support minimal 'fetch' using `gitoxide` This most notably excludes: * progress * SSH name guessing * retry on spurious timeout/connection issues --- Cargo.toml | 1 + src/cargo/sources/git/mod.rs | 1 + src/cargo/sources/git/oxide.rs | 69 +++++++++++++++ src/cargo/sources/git/utils.rs | 155 ++++++++++++++++++++++++--------- 4 files changed, 185 insertions(+), 41 deletions(-) create mode 100644 src/cargo/sources/git/oxide.rs diff --git a/Cargo.toml b/Cargo.toml index d3352269b625..2cc8e5662889 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ filetime = "0.2.9" flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] } git2 = "0.15.0" git2-curl = "0.16.0" +git-repository = { version = "0.29.0", features = ["blocking-http-transport-curl"] } glob = "0.3.0" hex = "0.4" home = "0.5" diff --git a/src/cargo/sources/git/mod.rs b/src/cargo/sources/git/mod.rs index b32dbb17be19..812ec094a279 100644 --- a/src/cargo/sources/git/mod.rs +++ b/src/cargo/sources/git/mod.rs @@ -1,4 +1,5 @@ pub use self::source::GitSource; pub use self::utils::{fetch, GitCheckout, GitDatabase, GitRemote}; +mod oxide; mod source; mod utils; diff --git a/src/cargo/sources/git/oxide.rs b/src/cargo/sources/git/oxide.rs new file mode 100644 index 000000000000..b5a45d90f147 --- /dev/null +++ b/src/cargo/sources/git/oxide.rs @@ -0,0 +1,69 @@ +//! This module contains all code sporting `gitoxide` for operations on `git` repositories and it mirrors +//! `utils` closely for now. One day it can be renamed into `utils` once `git2` isn't required anymore. + +use crate::util::{network, Progress}; +use crate::{CargoResult, Config}; +use git_repository as git; +use std::sync::atomic::AtomicBool; +use std::sync::Arc; +use std::time::Duration; + +/// For the time being, `repo_path` makes it easy to instantiate a gitoxide repo just for fetching. +/// In future this may change to be the gitoxide repository itself. +pub fn with_retry_and_progress( + repo_path: &std::path::Path, + config: &Config, + cb: &mut (dyn FnMut( + &git::Repository, + &AtomicBool, + &mut git::progress::tree::Item, + ) -> CargoResult<()> + + Send), +) -> CargoResult<()> { + let repo = git::open_opts(repo_path, { + let mut opts = git::open::Options::default(); + // We need `git_binary` configuration as well for being able to see credential helpers + // that are configured with the `git` installation itself. + // However, this is slow on windows (~150ms) and most people won't need it as they use the + // standard index which won't ever need authentication. + // TODO: This is certainly something to make configurable, at the very least on windows. + // Maybe it's also something that could be cached, all we need is the path to the configuration file + // which usually doesn't change unless the installation changes. Maybe something keyed by the location of the + // binary along with its fingerprint. + opts.permissions.config = git::permissions::Config::all(); + opts + })?; + + let progress_root: Arc = git::progress::tree::root::Options { + initial_capacity: 10, + message_buffer_capacity: 10, + } + .into(); + let mut progress = progress_root.add_child("operation"); + + // For decent interrupts of long-running computations and removal of temp files we should handle interrupts, and this + // is an easy way to do that. We will remove them later. + // We intentionally swallow errors here as if for some reason we can't register handlers, `cargo` will just work like before and + // abort on signals. + let _deregister_signal_handlers_on_drop = git::interrupt::init_handler(|| {}) + .ok() + .unwrap_or_default() + .auto_deregister(); + let should_interrupt = AtomicBool::new(false); + let _progress_bar = Progress::new("Fetch", config); + std::thread::scope(move |s| { + s.spawn({ + let root = Arc::downgrade(&progress_root); + move || -> CargoResult<()> { + let mut tasks = Vec::with_capacity(10); + while let Some(root) = root.upgrade() { + root.sorted_snapshot(&mut tasks); + // dbg!(&tasks); + std::thread::sleep(Duration::from_millis(300)); + } + Ok(()) + } + }); + network::with_retry(config, || cb(&repo, &should_interrupt, &mut progress)) + }) +} diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index 623caceb803a..4ed04a1d339b 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -2,6 +2,7 @@ //! authentication/cloning. use crate::core::GitReference; +use crate::sources::git::oxide; use crate::util::errors::CargoResult; use crate::util::{human_readable_bytes, network, Config, IntoUrl, MetricsCounter, Progress}; use anyhow::{anyhow, Context as _}; @@ -855,51 +856,123 @@ pub fn fetch( if let Some(true) = config.net_config()?.git_fetch_with_cli { return fetch_with_cli(repo, url, &refspecs, tags, config); } - - debug!("doing a fetch for {}", url); - let git_config = git2::Config::open_default()?; - with_fetch_options(&git_config, url, config, &mut |mut opts| { - if tags { - opts.download_tags(git2::AutotagOption::All); - } - // The `fetch` operation here may fail spuriously due to a corrupt - // repository. It could also fail, however, for a whole slew of other - // reasons (aka network related reasons). We want Cargo to automatically - // recover from corrupt repositories, but we don't want Cargo to stomp - // over other legitimate errors. - // - // Consequently we save off the error of the `fetch` operation and if it - // looks like a "corrupt repo" error then we blow away the repo and try - // again. If it looks like any other kind of error, or if we've already - // blown away the repository, then we want to return the error as-is. - let mut repo_reinitialized = false; - loop { - debug!("initiating fetch of {:?} from {}", refspecs, url); - let res = repo - .remote_anonymous(url)? - .fetch(&refspecs, Some(&mut opts), None); - let err = match res { - Ok(()) => break, - Err(e) => e, - }; - debug!("fetch failed: {}", err); - - if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb) - { - repo_reinitialized = true; - debug!( - "looks like this is a corrupt repository, reinitializing \ + if config + .cli_unstable() + .gitoxide + .map_or(false, |git| git.fetch) + { + use git::remote::fetch::Error; + use git_repository as git; + let git2_repo = repo; + oxide::with_retry_and_progress( + &git2_repo.path().to_owned(), + config, + &mut |repo, should_interrupt, progress| { + // The `fetch` operation here may fail spuriously due to a corrupt + // repository. It could also fail, however, for a whole slew of other + // reasons (aka network related reasons). We want Cargo to automatically + // recover from corrupt repositories, but we don't want Cargo to stomp + // over other legitimate errors. + // + // Consequently we save off the error of the `fetch` operation and if it + // looks like a "corrupt repo" error then we blow away the repo and try + // again. If it looks like any other kind of error, or if we've already + // blown away the repository, then we want to return the error as-is. + let mut repo_reinitialized = false; + let mut repo_storage; + let mut repo = &*repo; + loop { + debug!("initiating fetch of {:?} from {}", refspecs, url); + let res = repo + .remote_at(url)? + .with_refspecs( + refspecs.iter().map(|s| s.as_str()), + git::remote::Direction::Fetch, + )? + .connect(git::remote::Direction::Fetch, progress.add_child("fetch"))? + .prepare_fetch(git::remote::ref_map::Options::default())? + .receive(should_interrupt); + let err = match res { + Ok(_) => break, + Err(e) => e, + }; + debug!("fetch failed: {}", err); + + if !repo_reinitialized + && matches!( + err, + Error::Configuration { .. } + | Error::IncompatibleObjectHash { .. } + | Error::WritePack(_) + | Error::UpdateRefs(_) + | Error::RemovePackKeepFile { .. } + ) + { + repo_reinitialized = true; + debug!( + "looks like this is a corrupt repository, reinitializing \ and trying again" - ); - if reinitialize(repo).is_ok() { - continue; + ); + if reinitialize(git2_repo).is_ok() { + repo_storage = + git::open_opts(repo.path(), repo.open_options().to_owned())?; + repo = &repo_storage; + continue; + } + } + + return Err(err.into()); } + Ok(()) + }, + ) + } else { + debug!("doing a fetch for {}", url); + let git_config = git2::Config::open_default()?; + with_fetch_options(&git_config, url, config, &mut |mut opts| { + if tags { + opts.download_tags(git2::AutotagOption::All); } + // The `fetch` operation here may fail spuriously due to a corrupt + // repository. It could also fail, however, for a whole slew of other + // reasons (aka network related reasons). We want Cargo to automatically + // recover from corrupt repositories, but we don't want Cargo to stomp + // over other legitimate errors. + // + // Consequently we save off the error of the `fetch` operation and if it + // looks like a "corrupt repo" error then we blow away the repo and try + // again. If it looks like any other kind of error, or if we've already + // blown away the repository, then we want to return the error as-is. + let mut repo_reinitialized = false; + loop { + debug!("initiating fetch of {:?} from {}", refspecs, url); + let res = repo + .remote_anonymous(url)? + .fetch(&refspecs, Some(&mut opts), None); + let err = match res { + Ok(()) => break, + Err(e) => e, + }; + debug!("fetch failed: {}", err); + + if !repo_reinitialized + && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb) + { + repo_reinitialized = true; + debug!( + "looks like this is a corrupt repository, reinitializing \ + and trying again" + ); + if reinitialize(repo).is_ok() { + continue; + } + } - return Err(err.into()); - } - Ok(()) - }) + return Err(err.into()); + } + Ok(()) + }) + } } fn fetch_with_cli(