From ac37cc5c1ae1f9f8b0010d7f8c9abe8669c393d9 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Thu, 19 Nov 2020 15:50:13 -0800 Subject: [PATCH 01/83] Lift make_dep_prefix so it can be shared --- src/cargo/sources/registry/mod.rs | 24 ++++++++++++++++++++++++ src/cargo/sources/registry/remote.rs | 25 +------------------------ 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 76a5da0089a..4953a0bf10f 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -663,3 +663,27 @@ impl<'cfg> Source for RegistrySource<'cfg> { self.index.is_yanked(pkg, &mut *self.ops) } } + +fn make_dep_prefix(name: &str) -> String { + match name.len() { + 1 => String::from("1"), + 2 => String::from("2"), + 3 => format!("3/{}", &name[..1]), + _ => format!("{}/{}", &name[0..2], &name[2..4]), + } +} + +#[cfg(test)] +mod tests { + use super::make_dep_prefix; + + #[test] + fn dep_prefix() { + assert_eq!(make_dep_prefix("a"), "1"); + assert_eq!(make_dep_prefix("ab"), "2"); + assert_eq!(make_dep_prefix("abc"), "3/a"); + assert_eq!(make_dep_prefix("Abc"), "3/A"); + assert_eq!(make_dep_prefix("AbCd"), "Ab/Cd"); + assert_eq!(make_dep_prefix("aBcDe"), "aB/cD"); + } +} diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index 2e44d9ae3ea..e52d38b3756 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -1,5 +1,6 @@ use crate::core::{GitReference, PackageId, SourceId}; use crate::sources::git; +use crate::sources::registry::make_dep_prefix; use crate::sources::registry::MaybeLock; use crate::sources::registry::{ RegistryConfig, RegistryData, CRATE_TEMPLATE, LOWER_PREFIX_TEMPLATE, PREFIX_TEMPLATE, @@ -20,15 +21,6 @@ use std::mem; use std::path::Path; use std::str; -fn make_dep_prefix(name: &str) -> String { - match name.len() { - 1 => String::from("1"), - 2 => String::from("2"), - 3 => format!("3/{}", &name[..1]), - _ => format!("{}/{}", &name[0..2], &name[2..4]), - } -} - pub struct RemoteRegistry<'cfg> { index_path: Filesystem, cache_path: Filesystem, @@ -338,18 +330,3 @@ impl<'cfg> Drop for RemoteRegistry<'cfg> { self.tree.borrow_mut().take(); } } - -#[cfg(test)] -mod tests { - use super::make_dep_prefix; - - #[test] - fn dep_prefix() { - assert_eq!(make_dep_prefix("a"), "1"); - assert_eq!(make_dep_prefix("ab"), "2"); - assert_eq!(make_dep_prefix("abc"), "3/a"); - assert_eq!(make_dep_prefix("Abc"), "3/A"); - assert_eq!(make_dep_prefix("AbCd"), "Ab/Cd"); - assert_eq!(make_dep_prefix("aBcDe"), "aB/cD"); - } -} From c92f34c18e38cc27fd60fde8bdb13ed9398bda53 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Thu, 19 Nov 2020 15:51:17 -0800 Subject: [PATCH 02/83] Implement registry HTTP API from RFC This commit implements the HTTP registry API from https://github.com/rust-lang/rfcs/pull/2789. A proof-of-concept server implementation exists at https://github.com/kornelski/cargo-static-registry-rfc-proof-of-concept The client implementation supports the changelog extension in the RFC, but the server does not at the time of writing. This first draft does not take advantage of `Etag` to avoid re-downloads if the server does not support changelog. It also does not download multiple index files concurrently, since the Registry trait doesn't lend itself well to that at the moment. --- src/cargo/core/source/source_id.rs | 33 +- src/cargo/ops/registry.rs | 2 + src/cargo/sources/registry/http_remote.rs | 625 ++++++++++++++++++++++ src/cargo/sources/registry/mod.rs | 11 + 4 files changed, 669 insertions(+), 2 deletions(-) create mode 100644 src/cargo/sources/registry/http_remote.rs diff --git a/src/cargo/core/source/source_id.rs b/src/cargo/core/source/source_id.rs index 5e6322d9e9c..fb431034b42 100644 --- a/src/cargo/core/source/source_id.rs +++ b/src/cargo/core/source/source_id.rs @@ -54,6 +54,10 @@ enum SourceKind { LocalRegistry, /// A directory-based registry. Directory, + /// A remote registry accessed over HTTP. + /// + /// The protocol is specified by [this RFC](https://github.com/rust-lang/rfcs/pull/2789). + Http, } /// Information to find a specific commit in a Git repository. @@ -135,6 +139,10 @@ impl SourceId { Ok(SourceId::new(SourceKind::Registry, url)? .with_precise(Some("locked".to_string()))) } + "rfc" => { + let url = url.into_url()?; + Ok(SourceId::new(SourceKind::Http, url)?.with_precise(Some("locked".to_string()))) + } "path" => { let url = url.into_url()?; SourceId::new(SourceKind::Path, url) @@ -168,6 +176,11 @@ impl SourceId { SourceId::new(SourceKind::Registry, url.clone()) } + /// Creates a SourceId from a RFC HTTP URL. + pub fn for_http_registry(url: &Url) -> CargoResult { + SourceId::new(SourceKind::Http, url.clone()) + } + /// Creates a SourceId from a local registry path. pub fn for_local_registry(path: &Path) -> CargoResult { let url = path.into_url()?; @@ -241,7 +254,7 @@ impl SourceId { pub fn is_registry(self) -> bool { matches!( self.inner.kind, - SourceKind::Registry | SourceKind::LocalRegistry + SourceKind::Registry | SourceKind::Http | SourceKind::LocalRegistry ) } @@ -250,7 +263,7 @@ impl SourceId { /// "remote" may also mean a file URL to a git index, so it is not /// necessarily "remote". This just means it is not `local-registry`. pub fn is_remote_registry(self) -> bool { - matches!(self.inner.kind, SourceKind::Registry) + matches!(self.inner.kind, SourceKind::Registry | SourceKind::Http) } /// Returns `true` if this source from a Git repository. @@ -274,6 +287,11 @@ impl SourceId { }; Ok(Box::new(PathSource::new(&path, self, config))) } + SourceKind::Http => Ok(Box::new(RegistrySource::rfc_http( + self, + yanked_whitelist, + config, + ))), SourceKind::Registry => Ok(Box::new(RegistrySource::remote( self, yanked_whitelist, @@ -390,6 +408,10 @@ impl Ord for SourceId { (SourceKind::Path, _) => return Ordering::Less, (_, SourceKind::Path) => return Ordering::Greater, + (SourceKind::Http, SourceKind::Http) => {} + (SourceKind::Http, _) => return Ordering::Less, + (_, SourceKind::Http) => return Ordering::Greater, + (SourceKind::Registry, SourceKind::Registry) => {} (SourceKind::Registry, _) => return Ordering::Less, (_, SourceKind::Registry) => return Ordering::Greater, @@ -490,6 +512,7 @@ impl fmt::Display for SourceId { Ok(()) } SourceKind::Path => write!(f, "{}", url_display(&self.inner.url)), + SourceKind::Http => write!(f, "http registry `{}`", url_display(&self.inner.url)), SourceKind::Registry => write!(f, "registry `{}`", url_display(&self.inner.url)), SourceKind::LocalRegistry => write!(f, "registry `{}`", url_display(&self.inner.url)), SourceKind::Directory => write!(f, "dir {}", url_display(&self.inner.url)), @@ -536,6 +559,7 @@ impl Hash for SourceId { SourceKind::Registry => 2usize.hash(into), SourceKind::LocalRegistry => 3usize.hash(into), SourceKind::Directory => 4usize.hash(into), + SourceKind::Http => 5usize.hash(into), } match self.inner.kind { SourceKind::Git(_) => self.inner.canonical_url.hash(into), @@ -572,6 +596,11 @@ impl<'a> fmt::Display for SourceIdAsUrl<'a> { } Ok(()) } + SourceIdInner { + kind: SourceKind::Http, + ref url, + .. + } => write!(f, "rfc+{}", url), SourceIdInner { kind: SourceKind::Registry, ref url, diff --git a/src/cargo/ops/registry.rs b/src/cargo/ops/registry.rs index a96151fcd5a..a234d4cfc41 100644 --- a/src/cargo/ops/registry.rs +++ b/src/cargo/ops/registry.rs @@ -406,6 +406,7 @@ fn registry( sid ); } + // TODO: this will probably fail for SourceKind::Http at the moment let api_host = { let _lock = config.acquire_package_cache_lock()?; let mut src = RegistrySource::remote(sid, &HashSet::new(), config); @@ -837,6 +838,7 @@ fn get_source_id( ) -> CargoResult { match (reg, index) { (Some(r), _) => SourceId::alt_registry(config, r), + // TODO: this should go through from_url (_, Some(i)) => SourceId::for_registry(&i.into_url()?), _ => { let map = SourceConfigMap::new(config)?; diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs new file mode 100644 index 00000000000..5cd67203dab --- /dev/null +++ b/src/cargo/sources/registry/http_remote.rs @@ -0,0 +1,625 @@ +use crate::core::{PackageId, SourceId}; +use crate::ops; +use crate::sources::registry::make_dep_prefix; +use crate::sources::registry::MaybeLock; +use crate::sources::registry::{ + RegistryConfig, RegistryData, CRATE_TEMPLATE, LOWER_PREFIX_TEMPLATE, PREFIX_TEMPLATE, + VERSION_TEMPLATE, +}; +use crate::util::errors::{CargoResult, CargoResultExt}; +use crate::util::interning::InternedString; +use crate::util::paths; +use crate::util::{Config, Filesystem, Sha256}; +use anyhow::Context; +use curl::easy::Easy; +use log::{debug, trace, warn}; +use std::cell::{Cell, RefCell, RefMut}; +use std::fmt::Write as FmtWrite; +use std::fs::{self, File, OpenOptions}; +use std::io::prelude::*; +use std::io::SeekFrom; +use std::path::Path; +use std::str; + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +struct Version { + epoch: usize, + changelog_offset: usize, +} + +impl std::str::FromStr for Version { + type Err = &'static str; + + fn from_str(s: &str) -> Result { + let mut parts = s.split('.'); + let epoch = parts.next().expect("split always yields one item"); + let epoch = usize::from_str_radix(epoch, 10).map_err(|_| "invalid epoch")?; + let changelog_offset = parts.next().ok_or("no changelog offset")?; + let changelog_offset = + usize::from_str_radix(changelog_offset, 10).map_err(|_| "invalid changelog offset")?; + Ok(Version { + epoch, + changelog_offset, + }) + } +} + +impl ToString for Version { + fn to_string(&self) -> String { + format!("{}.{}", self.epoch, self.changelog_offset) + } +} + +// When dynamically linked against libcurl, we want to ignore some failures +// when using old versions that don't support certain features. +// +// NOTE: lifted from src/cargo/core/package.rs +macro_rules! try_old_curl { + ($e:expr, $msg:expr) => { + let result = $e; + if cfg!(target_os = "macos") { + if let Err(e) = result { + warn!("ignoring libcurl {} error: {}", $msg, e); + } + } else { + result.with_context(|| { + anyhow::format_err!("failed to enable {}, is curl not built right?", $msg) + })?; + } + }; +} + +pub struct HttpRegistry<'cfg> { + index_path: Filesystem, + cache_path: Filesystem, + source_id: SourceId, + config: &'cfg Config, + at: Cell>, + checked_for_at: Cell, + http: RefCell>, + // dirty: RefCell> +} + +impl<'cfg> HttpRegistry<'cfg> { + pub fn new(source_id: SourceId, config: &'cfg Config, name: &str) -> HttpRegistry<'cfg> { + HttpRegistry { + index_path: config.registry_index_path().join(name), + cache_path: config.registry_cache_path().join(name), + source_id, + config, + at: Cell::new(None), + checked_for_at: Cell::new(false), + http: RefCell::new(None), + } + } + + fn filename(&self, pkg: PackageId) -> String { + format!("{}-{}.crate", pkg.name(), pkg.version()) + } + + fn http(&self) -> CargoResult> { + let handle = if let Ok(h) = self.http.try_borrow_mut() { + h + } else { + anyhow::bail!("concurrent index downloads are not yet supported"); + }; + + if handle.is_none() { + assert!(self.config.offline()); + anyhow::bail!("can't access remote index: you are in offline mode (--offline)"); + } else { + Ok(RefMut::map(handle, |opt| { + opt.as_mut().expect("!handle.is_none() implies Some") + })) + } + } +} + +const LAST_UPDATED_FILE: &str = ".last-updated"; + +impl<'cfg> RegistryData for HttpRegistry<'cfg> { + fn prepare(&self) -> CargoResult<()> { + if self.at.get().is_none() && !self.checked_for_at.get() { + self.checked_for_at.set(true); + let path = self.config.assert_package_cache_locked(&self.index_path); + if path.exists() { + let version = paths::read(&path.join(LAST_UPDATED_FILE))?; + let version: Version = version + .parse() + .map_err(|e| anyhow::anyhow!("{}", e)) + .chain_err(|| format!("failed to parse last version: '{}'", version))?; + let as_str = InternedString::from(version.to_string()); + self.at.set(Some((version, as_str))); + } + } + + if !self.config.offline() { + let mut http = if let Ok(h) = self.http.try_borrow_mut() { + h + } else { + anyhow::bail!("concurrent index downloads are not yet supported"); + }; + + if http.is_none() { + // Ensure that we'll actually be able to acquire an HTTP handle later on + // once we start trying to download crates. This will weed out any + // problems with `.cargo/config` configuration related to HTTP. + // + // This way if there's a problem the error gets printed before we even + // hit the index, which may not actually read this configuration. + let mut handle = ops::http_handle(&self.config)?; + handle.get(true)?; + handle.follow_location(true)?; + + // TODO: explicitly enable HTTP2? + // https://github.com/rust-lang/cargo/blob/905134577c1955ad7865bcf4b31440d4bc882cde/src/cargo/core/package.rs#L651-L703 + + // This is an option to `libcurl` which indicates that if there's a + // bunch of parallel requests to the same host they all wait until the + // pipelining status of the host is known. This means that we won't + // initiate dozens of connections to crates.io, but rather only one. + // Once the main one is opened we realized that pipelining is possible + // and multiplexing is possible with static.crates.io. All in all this + // reduces the number of connections done to a more manageable state. + // + // NOTE: lifted from src/cargo/core/package.rs + try_old_curl!(handle.pipewait(true), "pipewait"); + *http = Some(handle); + } + } + Ok(()) + } + + fn index_path(&self) -> &Filesystem { + // NOTE: pretty sure this method is unnecessary. + // the only place it is used is to set .path in RegistryIndex, + // which only uses it to call assert_index_locked below... + &self.index_path + } + + fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path { + self.config.assert_package_cache_locked(path) + } + + fn current_version(&self) -> Option { + self.at.get().map(|(_, as_str)| as_str) + } + + fn load( + &self, + root: &Path, + path: &Path, + data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, + ) -> CargoResult<()> { + let pkg = root.join(path); + if pkg.exists() { + return data(&paths::read_bytes(&pkg)?); + } + + let url = self.source_id.url(); + if self.config.offline() { + anyhow::bail!( + "can't download index file from '{}': you are in offline mode (--offline)", + url + ); + } + + self.prepare()?; + let mut handle = self.http()?; + handle.url(&format!("{}{}", url, path.display()))?; + + let mut contents = Vec::new(); + let mut transfer = handle.transfer(); + transfer.write_function(|buf| { + contents.extend_from_slice(buf); + Ok(buf.len()) + })?; + + // TODO: should we display transfer status here somehow? + + transfer + .perform() + .chain_err(|| format!("failed to fetch index file `{}`", path.display()))?; + drop(transfer); + + match handle.response_code()? { + 200 => {} + 404 | 410 | 451 => { + // crate was deleted from the registry. + // nothing to do here since we already deleted the file from the index. + // we just won't populate it again. + anyhow::bail!("crate has been deleted from the registry"); + } + code => { + anyhow::bail!("server returned unexpected HTTP status code {}", code); + } + } + + paths::write(&root.join(path), &contents)?; + data(&contents) + } + + fn config(&mut self) -> CargoResult> { + debug!("loading config"); + self.prepare()?; + self.config.assert_package_cache_locked(&self.index_path); + let mut config = None; + self.load(Path::new(""), Path::new("config.json"), &mut |json| { + config = Some(serde_json::from_slice(json)?); + Ok(()) + })?; + trace!("config loaded"); + Ok(config) + } + + fn update_index(&mut self) -> CargoResult<()> { + if self.config.offline() { + return Ok(()); + } + if self.config.cli_unstable().no_index_update { + return Ok(()); + } + // Make sure the index is only updated once per session since it is an + // expensive operation. This generally only happens when the resolver + // is run multiple times, such as during `cargo publish`. + if self.config.updated_sources().contains(&self.source_id) { + return Ok(()); + } + + debug!("updating the index"); + + self.prepare()?; + let path = self.config.assert_package_cache_locked(&self.index_path); + self.config + .shell() + .status("Updating", self.source_id.display_index())?; + + // Fetch the tail of the changelog. + let url = self.source_id.url(); + // let mut progress = Progress::new("Fetch", config); + // TODO: retry logic? network::with_retry + + enum ChangelogUse { + /// We are fetching the changelog with no historical context. + FirstFetch { full: bool }, + /// We are trying to follow the changelog to update our view of the index. + Follow(Version), + } + + let mut handle = self.http()?; + // TODO: .join? may do the wrong thing if url does not end with / + handle.url(&format!("{}/changelog", url))?; + let mut plan = if let Some((version, _)) = self.at.get() { + ChangelogUse::Follow(version) + } else { + ChangelogUse::FirstFetch { full: false } + }; + + let all_dirty = 'changelog: loop { + // reset in case we looped + handle.range("")?; + handle.resume_from(0)?; + + match plan { + ChangelogUse::Follow(version) => { + handle.resume_from(version.changelog_offset as u64)?; + } + ChangelogUse::FirstFetch { full: false } => { + // we really just need the epoch number and file size, + // which we can get at by fetching just the first line. + // "1 2019-10-18 23:51:23 ".len() == 22 + handle.range("0-22")?; + } + ChangelogUse::FirstFetch { full: _ } => {} + } + + let mut contents = Vec::new(); + let mut total_bytes = None; + let mut transfer = handle.transfer(); + transfer.write_function(|buf| { + contents.extend_from_slice(buf); + Ok(buf.len()) + })?; + + transfer.header_function(|buf| { + const CONTENT_RANGE: &'static [u8] = b"Content-Range:"; + if buf.len() > CONTENT_RANGE.len() + && buf[..CONTENT_RANGE.len()].eq_ignore_ascii_case(CONTENT_RANGE) + { + let mut buf = &buf[CONTENT_RANGE.len()..]; + + // trim whitespace + while !buf.is_empty() && buf[0] == b' ' { + buf = &buf[1..]; + } + + // check that the Content-Range unit is indeed bytes + const BYTES_UNIT: &'static [u8] = b"bytes "; + if !buf.starts_with(BYTES_UNIT) { + return true; + } + buf = &buf[BYTES_UNIT.len()..]; + + // extract out the total length (if known) + let rest = buf.splitn(2, |&c| c == b'/'); + if let Some(complete_length) = rest.skip(1 /* byte-range */).next() { + if complete_length.starts_with(b"*") { + // total length isn't known + // this seems weird, but shrug + return true; + } + let complete_length = complete_length + .splitn(2, |&c| c == b' ') + .next() + .expect("split always yields >= 1 element"); + if complete_length.into_iter().all(|c| c.is_ascii_digit()) { + let complete_length = + std::str::from_utf8(complete_length).expect("only ascii digits"); + total_bytes = Some( + usize::from_str_radix(complete_length, 10) + .expect("ascii digits make for valid numbers"), + ); + } + } + } + true + })?; + + // TODO: should we show status/progress here? + + transfer + .perform() + .chain_err(|| format!("failed to fetch index changelog from `{}`", url))?; + drop(transfer); + + let mut contents = &contents[..]; + let total_bytes = match handle.response_code()? { + 200 => { + // server does not support Range: + // so we need to manually slice contents + let total_bytes = contents.len(); + if let ChangelogUse::Follow(version) = plan { + if contents.len() < version.changelog_offset { + // must have rolled over. + // luckily, since the server sent the whole response, + // we can just continue as if that was our plan all along. + plan = ChangelogUse::FirstFetch { full: true }; + } else { + contents = &contents[version.changelog_offset..]; + } + } + total_bytes + } + 206 => { + match total_bytes { + None => { + match plan { + ChangelogUse::FirstFetch { full } => { + assert!(!full, "got partial response without Range:"); + + // we need to know the total size of the changelog to know our + // next offset. but, the server didn't give that to us when we + // requested just the first few bytes, so we need to do a full + // request. + plan = ChangelogUse::FirstFetch { full: true }; + continue; + } + ChangelogUse::Follow(version) => { + version.changelog_offset + contents.len() + } + } + } + Some(b) => b, + } + } + 204 => { + // no changes in changelog + break false; + } + 404 => { + // server does not have a changelog + break true; + } + 416 => { + // Range Not Satisfiable + // changelog must have been rolled over + if let ChangelogUse::FirstFetch { full: false } = plan { + // the changelog is _probably_ empty + plan = ChangelogUse::FirstFetch { full: true }; + } else { + plan = ChangelogUse::FirstFetch { full: false }; + } + continue; + } + code => { + anyhow::bail!("server returned unexpected HTTP status code {}", code); + } + }; + + let mut line = String::new(); + while contents.read_line(&mut line)? != 0 { + let mut parts = line.trim().splitn(2, ' '); + let epoch = parts.next().expect("split always has one element"); + if epoch.is_empty() { + // skip empty lines + continue; + } + let epoch = if let Ok(epoch) = epoch.parse::() { + epoch + } else { + warn!("index {} changelog has invalid lines", url); + break 'changelog true; + }; + + let mismatch = match plan { + ChangelogUse::FirstFetch { .. } => true, + ChangelogUse::Follow(ref version) if version.epoch != epoch => { + debug!("index {} changelog has rolled over", url); + // TODO: try previous changelog if available? + true + } + ChangelogUse::Follow(_) => false, + }; + + if mismatch { + debug!( + "index {} is at epoch {} (offset: {})", + url, epoch, total_bytes + ); + + let version = Version { + epoch, + changelog_offset: total_bytes, + }; + let as_str = InternedString::from(version.to_string()); + self.at.set(Some((version, as_str))); + + break 'changelog true; + } + + let rest = if let Some(rest) = parts.next() { + rest + } else { + warn!("index {} changelog has invalid lines", url); + break 'changelog true; + }; + let mut parts = rest.rsplitn(2, ' '); + let krate = parts.next().expect("rsplit always has one element"); + if krate.is_empty() { + warn!("index {} changelog has invalid lines", url); + break 'changelog true; + } + + // remove the index file -- we'll have to re-fetch it + let path = path.join(&Path::new(&make_dep_prefix(krate))).join(krate); + if path.exists() { + paths::remove_file(path)?; + } + } + + match plan { + ChangelogUse::Follow(version) => { + // update version so that index cache won't be used and load will be called + let version = Version { + epoch: version.epoch, + changelog_offset: total_bytes, + }; + let as_str = InternedString::from(version.to_string()); + self.at.set(Some((version, as_str))); + + break false; + } + ChangelogUse::FirstFetch { .. } => { + // we can only get here if the changelog was empty. + // what do we do? we don't know what the current epoch is! + // mark everything as dirty and don't write out a version. + self.at.set(None); + break true; + } + } + }; + + // reset the http handle + handle.range("")?; + handle.resume_from(0)?; + + if all_dirty { + // mark all files in index as dirty + // TODO: this is obviously sub-optimal + paths::remove_dir_all(&path)?; + } + + self.config.updated_sources().insert(self.source_id); + + // Record the latest known state of the index. + if let Some((_, version)) = self.at.get() { + paths::write(&path.join(LAST_UPDATED_FILE), version.as_bytes())?; + } + + Ok(()) + } + + fn download(&mut self, pkg: PackageId, _checksum: &str) -> CargoResult { + let filename = self.filename(pkg); + + // Attempt to open an read-only copy first to avoid an exclusive write + // lock and also work with read-only filesystems. Note that we check the + // length of the file like below to handle interrupted downloads. + // + // If this fails then we fall through to the exclusive path where we may + // have to redownload the file. + let path = self.cache_path.join(&filename); + let path = self.config.assert_package_cache_locked(&path); + if let Ok(dst) = File::open(&path) { + let meta = dst.metadata()?; + if meta.len() > 0 { + return Ok(MaybeLock::Ready(dst)); + } + } + + let config = self.config()?.unwrap(); + let mut url = config.dl; + if !url.contains(CRATE_TEMPLATE) + && !url.contains(VERSION_TEMPLATE) + && !url.contains(PREFIX_TEMPLATE) + && !url.contains(LOWER_PREFIX_TEMPLATE) + { + write!(url, "/{}/{}/download", CRATE_TEMPLATE, VERSION_TEMPLATE).unwrap(); + } + let prefix = make_dep_prefix(&*pkg.name()); + let url = url + .replace(CRATE_TEMPLATE, &*pkg.name()) + .replace(VERSION_TEMPLATE, &pkg.version().to_string()) + .replace(PREFIX_TEMPLATE, &prefix) + .replace(LOWER_PREFIX_TEMPLATE, &prefix.to_lowercase()); + + Ok(MaybeLock::Download { + url, + descriptor: pkg.to_string(), + }) + } + + fn finish_download( + &mut self, + pkg: PackageId, + checksum: &str, + data: &[u8], + ) -> CargoResult { + // Verify what we just downloaded + let actual = Sha256::new().update(data).finish_hex(); + if actual != checksum { + anyhow::bail!("failed to verify the checksum of `{}`", pkg) + } + + let filename = self.filename(pkg); + self.cache_path.create_dir()?; + let path = self.cache_path.join(&filename); + let path = self.config.assert_package_cache_locked(&path); + let mut dst = OpenOptions::new() + .create(true) + .read(true) + .write(true) + .open(&path) + .chain_err(|| format!("failed to open `{}`", path.display()))?; + let meta = dst.metadata()?; + if meta.len() > 0 { + return Ok(dst); + } + + dst.write_all(data)?; + dst.seek(SeekFrom::Start(0))?; + Ok(dst) + } + + fn is_crate_downloaded(&self, pkg: PackageId) -> bool { + let filename = format!("{}-{}.crate", pkg.name(), pkg.version()); + let path = Path::new(&filename); + + let path = self.cache_path.join(path); + let path = self.config.assert_package_cache_locked(&path); + if let Ok(meta) = fs::metadata(path) { + return meta.len() > 0; + } + false + } +} diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 4953a0bf10f..caaca8c391e 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -396,6 +396,7 @@ pub enum MaybeLock { Download { url: String, descriptor: String }, } +mod http_remote; mod index; mod local; mod remote; @@ -407,6 +408,16 @@ fn short_name(id: SourceId) -> String { } impl<'cfg> RegistrySource<'cfg> { + pub fn rfc_http( + source_id: SourceId, + yanked_whitelist: &HashSet, + config: &'cfg Config, + ) -> RegistrySource<'cfg> { + let name = short_name(source_id); + let ops = http_remote::HttpRegistry::new(source_id, config, &name); + RegistrySource::new(source_id, config, &name, Box::new(ops), yanked_whitelist) + } + pub fn remote( source_id: SourceId, yanked_whitelist: &HashSet, From 37bd64df91726854465a3d8a90220293b0770e08 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Thu, 19 Nov 2020 17:34:52 -0800 Subject: [PATCH 03/83] Improve support for non-changelog HTTP registries With this change, a registry that does not support the optional changelog feature will no longer purge the index on every build. Instead, it will double-check the current file contents with the server for each load, which is fairly efficient if a given index file has _not_ changed. The changelog is (unsurprisingly) still much more efficient. --- src/cargo/sources/registry/http_remote.rs | 345 ++++++++++++++++------ 1 file changed, 258 insertions(+), 87 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 5cd67203dab..2f811613a79 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -11,7 +11,7 @@ use crate::util::interning::InternedString; use crate::util::paths; use crate::util::{Config, Filesystem, Sha256}; use anyhow::Context; -use curl::easy::Easy; +use curl::easy::{Easy, List}; use log::{debug, trace, warn}; use std::cell::{Cell, RefCell, RefMut}; use std::fmt::Write as FmtWrite; @@ -22,31 +22,68 @@ use std::path::Path; use std::str; #[derive(Debug, Copy, Clone, PartialEq, Eq)] -struct Version { - epoch: usize, - changelog_offset: usize, +enum ChangelogState { + Unknown, + Unsupported, + Synchronized { + epoch: usize, + changelog_offset: usize, + }, } -impl std::str::FromStr for Version { +impl ChangelogState { + fn is_synchronized(&self) -> bool { + matches!(self, ChangelogState::Synchronized { .. }) + } + fn is_unknown(&self) -> bool { + matches!(self, ChangelogState::Unknown) + } + fn is_unsupported(&self) -> bool { + matches!(self, ChangelogState::Unsupported) + } +} + +impl Into<(ChangelogState, InternedString)> for ChangelogState { + fn into(self) -> (ChangelogState, InternedString) { + let is = InternedString::from(self.to_string()); + (self, is) + } +} + +impl std::str::FromStr for ChangelogState { type Err = &'static str; fn from_str(s: &str) -> Result { + if s == "unknown" { + return Ok(ChangelogState::Unknown); + } + if s == "unsupported" { + return Ok(ChangelogState::Unsupported); + } + let mut parts = s.split('.'); let epoch = parts.next().expect("split always yields one item"); let epoch = usize::from_str_radix(epoch, 10).map_err(|_| "invalid epoch")?; let changelog_offset = parts.next().ok_or("no changelog offset")?; let changelog_offset = usize::from_str_radix(changelog_offset, 10).map_err(|_| "invalid changelog offset")?; - Ok(Version { + Ok(ChangelogState::Synchronized { epoch, changelog_offset, }) } } -impl ToString for Version { +impl ToString for ChangelogState { fn to_string(&self) -> String { - format!("{}.{}", self.epoch, self.changelog_offset) + match *self { + ChangelogState::Unknown => String::from("unknown"), + ChangelogState::Unsupported => String::from("unsupported"), + ChangelogState::Synchronized { + epoch, + changelog_offset, + } => format!("{}.{}", epoch, changelog_offset), + } } } @@ -74,7 +111,7 @@ pub struct HttpRegistry<'cfg> { cache_path: Filesystem, source_id: SourceId, config: &'cfg Config, - at: Cell>, + at: Cell<(ChangelogState, InternedString)>, checked_for_at: Cell, http: RefCell>, // dirty: RefCell> @@ -87,7 +124,7 @@ impl<'cfg> HttpRegistry<'cfg> { cache_path: config.registry_cache_path().join(name), source_id, config, - at: Cell::new(None), + at: Cell::new(ChangelogState::Unknown.into()), checked_for_at: Cell::new(false), http: RefCell::new(None), } @@ -119,17 +156,18 @@ const LAST_UPDATED_FILE: &str = ".last-updated"; impl<'cfg> RegistryData for HttpRegistry<'cfg> { fn prepare(&self) -> CargoResult<()> { - if self.at.get().is_none() && !self.checked_for_at.get() { + if self.at.get().0.is_unknown() && !self.checked_for_at.get() { self.checked_for_at.set(true); let path = self.config.assert_package_cache_locked(&self.index_path); if path.exists() { - let version = paths::read(&path.join(LAST_UPDATED_FILE))?; - let version: Version = version + let cl_state = paths::read(&path.join(LAST_UPDATED_FILE))?; + let cl_state: ChangelogState = cl_state .parse() .map_err(|e| anyhow::anyhow!("{}", e)) - .chain_err(|| format!("failed to parse last version: '{}'", version))?; - let as_str = InternedString::from(version.to_string()); - self.at.set(Some((version, as_str))); + .chain_err(|| { + format!("failed to parse last changelog state: '{}'", cl_state) + })?; + self.at.set(cl_state.into()); } } @@ -182,7 +220,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } fn current_version(&self) -> Option { - self.at.get().map(|(_, as_str)| as_str) + let cl_state = self.at.get(); + if cl_state.0.is_unknown() { + None + } else { + Some(cl_state.1) + } } fn load( @@ -192,9 +235,33 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, ) -> CargoResult<()> { let pkg = root.join(path); - if pkg.exists() { - return data(&paths::read_bytes(&pkg)?); - } + let bytes; + let was = if pkg.exists() { + bytes = paths::read_bytes(&pkg)?; + let mut lines = bytes.splitn(3, |&c| c == b'\n'); + let etag = lines.next().expect("splitn always returns >=1 item"); + let last_modified = if let Some(lm) = lines.next() { + lm + } else { + anyhow::bail!("index file is missing HTTP header header"); + }; + let rest = if let Some(rest) = lines.next() { + rest + } else { + anyhow::bail!("index file is missing HTTP header header"); + }; + + if !self.at.get().0.is_unsupported() || self.config.offline() { + return data(rest); + } else { + // we cannot trust the index files -- need to check with server + let etag = std::str::from_utf8(etag)?; + let last_modified = std::str::from_utf8(last_modified)?; + Some((etag, last_modified)) + } + } else { + None + }; let url = self.source_id.url(); if self.config.offline() { @@ -208,13 +275,58 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let mut handle = self.http()?; handle.url(&format!("{}{}", url, path.display()))?; + if let Some((etag, last_modified)) = was { + let mut list = List::new(); + list.append(&format!("If-None-Match: {}", etag))?; + list.append(&format!("If-Modified-Since: {}", last_modified))?; + handle.http_headers(list)?; + } + let mut contents = Vec::new(); + let mut etag = None; + let mut last_modified = None; let mut transfer = handle.transfer(); transfer.write_function(|buf| { contents.extend_from_slice(buf); Ok(buf.len()) })?; + // capture ETag and Last-Modified + transfer.header_function(|buf| { + const ETAG: &'static [u8] = b"ETag:"; + const LAST_MODIFIED: &'static [u8] = b"Last-Modified:"; + + let (tag, buf) = + if buf.len() >= ETAG.len() && buf[..ETAG.len()].eq_ignore_ascii_case(ETAG) { + (ETAG, &buf[ETAG.len()..]) + } else if buf.len() >= LAST_MODIFIED.len() + && buf[..LAST_MODIFIED.len()].eq_ignore_ascii_case(LAST_MODIFIED) + { + (LAST_MODIFIED, &buf[LAST_MODIFIED.len()..]) + } else { + return true; + }; + + // don't let server sneak more lines into index file + if buf.contains(&b'\n') { + return true; + } + + if let Ok(buf) = std::str::from_utf8(buf) { + let buf = buf.trim(); + let mut s = String::with_capacity(buf.len() + 1); + s.push_str(buf); + s.push('\n'); + if tag == ETAG { + etag = Some(s); + } else if tag == LAST_MODIFIED { + last_modified = Some(s); + } + } + + true + })?; + // TODO: should we display transfer status here somehow? transfer @@ -222,8 +334,17 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { .chain_err(|| format!("failed to fetch index file `{}`", path.display()))?; drop(transfer); + // don't send If-Modified-Since with future requests + let mut list = List::new(); + list.append("If-Modified-Since:")?; + handle.http_headers(list)?; + match handle.response_code()? { 200 => {} + 304 => { + // not modified + assert!(was.is_some()); + } 404 | 410 | 451 => { // crate was deleted from the registry. // nothing to do here since we already deleted the file from the index. @@ -235,7 +356,11 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } } - paths::write(&root.join(path), &contents)?; + let mut file = paths::create(&root.join(path))?; + file.write_all(etag.as_deref().unwrap_or("\n").as_bytes())?; + file.write_all(last_modified.as_deref().unwrap_or("\n").as_bytes())?; + file.write_all(&contents)?; + file.flush()?; data(&contents) } @@ -283,26 +408,38 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { /// We are fetching the changelog with no historical context. FirstFetch { full: bool }, /// We are trying to follow the changelog to update our view of the index. - Follow(Version), + Follow { + epoch: usize, + changelog_offset: usize, + }, } let mut handle = self.http()?; // TODO: .join? may do the wrong thing if url does not end with / handle.url(&format!("{}/changelog", url))?; - let mut plan = if let Some((version, _)) = self.at.get() { - ChangelogUse::Follow(version) + let mut plan = if let ChangelogState::Synchronized { + epoch, + changelog_offset, + } = self.at.get().0 + { + ChangelogUse::Follow { + epoch, + changelog_offset, + } } else { ChangelogUse::FirstFetch { full: false } }; - let all_dirty = 'changelog: loop { + 'changelog: loop { // reset in case we looped handle.range("")?; handle.resume_from(0)?; match plan { - ChangelogUse::Follow(version) => { - handle.resume_from(version.changelog_offset as u64)?; + ChangelogUse::Follow { + changelog_offset, .. + } => { + handle.resume_from(changelog_offset as u64)?; } ChangelogUse::FirstFetch { full: false } => { // we really just need the epoch number and file size, @@ -378,14 +515,21 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // server does not support Range: // so we need to manually slice contents let total_bytes = contents.len(); - if let ChangelogUse::Follow(version) = plan { - if contents.len() < version.changelog_offset { + if let ChangelogUse::Follow { + changelog_offset, .. + } = plan + { + if contents.len() < changelog_offset { // must have rolled over. // luckily, since the server sent the whole response, // we can just continue as if that was our plan all along. plan = ChangelogUse::FirstFetch { full: true }; } else { - contents = &contents[version.changelog_offset..]; + contents = &contents[changelog_offset..]; + if contents.is_empty() { + // no changes in changelog + break; + } } } total_bytes @@ -404,9 +548,9 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { plan = ChangelogUse::FirstFetch { full: true }; continue; } - ChangelogUse::Follow(version) => { - version.changelog_offset + contents.len() - } + ChangelogUse::Follow { + changelog_offset, .. + } => changelog_offset + contents.len(), } } Some(b) => b, @@ -414,11 +558,18 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } 204 => { // no changes in changelog - break false; + assert!(self.at.get().0.is_synchronized()); + break; } 404 => { // server does not have a changelog - break true; + if self.at.get().0.is_synchronized() { + // we used to have a changelog, but now we don't. it's important that we + // record that fact so that later calls to load() will all double-check + // with the server. + self.at.set(ChangelogState::Unsupported.into()); + } + break; } 416 => { // Range Not Satisfiable @@ -437,6 +588,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { }; let mut line = String::new(); + let mut new_changelog = false; + let mut fetched_epoch = None; while contents.read_line(&mut line)? != 0 { let mut parts = line.trim().splitn(2, ' '); let epoch = parts.next().expect("split always has one element"); @@ -445,49 +598,50 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { continue; } let epoch = if let Ok(epoch) = epoch.parse::() { + fetched_epoch = Some(epoch); epoch } else { warn!("index {} changelog has invalid lines", url); - break 'changelog true; + // ensure that all future index fetches check with server + self.at.set(ChangelogState::Unsupported.into()); + break 'changelog; }; - let mismatch = match plan { - ChangelogUse::FirstFetch { .. } => true, - ChangelogUse::Follow(ref version) if version.epoch != epoch => { + match plan { + ChangelogUse::FirstFetch { .. } => { + new_changelog = true; + + // we don't actually care about the remainder of the changelog, + // since we've completely purged our local index. + break; + } + ChangelogUse::Follow { + epoch: last_epoch, .. + } if last_epoch != epoch => { debug!("index {} changelog has rolled over", url); // TODO: try previous changelog if available? - true - } - ChangelogUse::Follow(_) => false, - }; - - if mismatch { - debug!( - "index {} is at epoch {} (offset: {})", - url, epoch, total_bytes - ); - - let version = Version { - epoch, - changelog_offset: total_bytes, - }; - let as_str = InternedString::from(version.to_string()); - self.at.set(Some((version, as_str))); - break 'changelog true; + new_changelog = true; + break; + } + ChangelogUse::Follow { .. } => {} } let rest = if let Some(rest) = parts.next() { rest } else { warn!("index {} changelog has invalid lines", url); - break 'changelog true; + // ensure that all future index fetches check with server + self.at.set(ChangelogState::Unsupported.into()); + break 'changelog; }; let mut parts = rest.rsplitn(2, ' '); let krate = parts.next().expect("rsplit always has one element"); if krate.is_empty() { warn!("index {} changelog has invalid lines", url); - break 'changelog true; + // ensure that all future index fetches check with server + self.at.set(ChangelogState::Unsupported.into()); + break 'changelog; } // remove the index file -- we'll have to re-fetch it @@ -497,44 +651,61 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } } - match plan { - ChangelogUse::Follow(version) => { - // update version so that index cache won't be used and load will be called - let version = Version { - epoch: version.epoch, - changelog_offset: total_bytes, - }; - let as_str = InternedString::from(version.to_string()); - self.at.set(Some((version, as_str))); - - break false; - } - ChangelogUse::FirstFetch { .. } => { - // we can only get here if the changelog was empty. - // what do we do? we don't know what the current epoch is! - // mark everything as dirty and don't write out a version. - self.at.set(None); - break true; + if total_bytes == 0 { + // the changelog has rolled over, but we didn't realize since we didn't actually + // _observe_ another epoch number. catch that here. + new_changelog = true; + } + + if new_changelog { + if let Some(epoch) = fetched_epoch { + debug!( + "index {} is at epoch {} (offset: {})", + url, epoch, total_bytes + ); + + // we don't know which index entries are now invalid and which are not. + // so we purge them all. + // XXX: will this cause issues with directory locking? + paths::remove_dir_all(&path)?; + paths::create_dir_all(&path)?; + + // but from this point forward we're synchronized + self.at.set( + ChangelogState::Synchronized { + epoch, + changelog_offset: total_bytes, + } + .into(), + ); + } else { + // we have a new changelog, but we don't know what the epoch of that changelog + // is since it was empty (otherwise fetched_epoch would be Some). + self.at.set(ChangelogState::Unknown.into()); } + break; } - }; + + // keep track of our new byte offset in the changelog + let epoch = fetched_epoch.expect("changelog was non-empty (total_bytes != 0)"); + self.at.set( + ChangelogState::Synchronized { + epoch, + changelog_offset: total_bytes, + } + .into(), + ); + break; + } // reset the http handle handle.range("")?; handle.resume_from(0)?; - if all_dirty { - // mark all files in index as dirty - // TODO: this is obviously sub-optimal - paths::remove_dir_all(&path)?; - } - self.config.updated_sources().insert(self.source_id); // Record the latest known state of the index. - if let Some((_, version)) = self.at.get() { - paths::write(&path.join(LAST_UPDATED_FILE), version.as_bytes())?; - } + paths::write(&path.join(LAST_UPDATED_FILE), self.at.get().1.as_bytes())?; Ok(()) } From f04eee160050bdab153395bb5fa5775b7142831e Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 12:37:12 -0800 Subject: [PATCH 04/83] Share try_old_curl --- src/cargo/core/package.rs | 20 +------------------- src/cargo/macros.rs | 18 ++++++++++++++++++ src/cargo/sources/registry/http_remote.rs | 20 -------------------- 3 files changed, 19 insertions(+), 39 deletions(-) diff --git a/src/cargo/core/package.rs b/src/cargo/core/package.rs index d0a4f847b31..1d541858085 100644 --- a/src/cargo/core/package.rs +++ b/src/cargo/core/package.rs @@ -8,12 +8,11 @@ use std::path::{Path, PathBuf}; use std::rc::Rc; use std::time::{Duration, Instant}; -use anyhow::Context; use bytesize::ByteSize; use curl::easy::{Easy, HttpVersion}; use curl::multi::{EasyHandle, Multi}; use lazycell::LazyCell; -use log::{debug, warn}; +use log::debug; use semver::Version; use serde::Serialize; @@ -579,23 +578,6 @@ impl<'cfg> PackageSet<'cfg> { } } -// When dynamically linked against libcurl, we want to ignore some failures -// when using old versions that don't support certain features. -macro_rules! try_old_curl { - ($e:expr, $msg:expr) => { - let result = $e; - if cfg!(target_os = "macos") { - if let Err(e) = result { - warn!("ignoring libcurl {} error: {}", $msg, e); - } - } else { - result.with_context(|| { - anyhow::format_err!("failed to enable {}, is curl not built right?", $msg) - })?; - } - }; -} - impl<'a, 'cfg> Downloads<'a, 'cfg> { /// Starts to download the package for the `id` specified. /// diff --git a/src/cargo/macros.rs b/src/cargo/macros.rs index 3ebf3b37f67..763b0b4979a 100644 --- a/src/cargo/macros.rs +++ b/src/cargo/macros.rs @@ -47,3 +47,21 @@ impl fmt::Debug for DisplayAsDebug { fmt::Display::fmt(&self.0, f) } } + +// When dynamically linked against libcurl, we want to ignore some failures +// when using old versions that don't support certain features. +macro_rules! try_old_curl { + ($e:expr, $msg:expr) => { + let result = $e; + if cfg!(target_os = "macos") { + if let Err(e) = result { + log::warn!("ignoring libcurl {} error: {}", $msg, e); + } + } else { + use anyhow::Context; + result.with_context(|| { + anyhow::format_err!("failed to enable {}, is curl not built right?", $msg) + })?; + } + }; +} diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 2f811613a79..37e098dd9a4 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -10,7 +10,6 @@ use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; use crate::util::paths; use crate::util::{Config, Filesystem, Sha256}; -use anyhow::Context; use curl::easy::{Easy, List}; use log::{debug, trace, warn}; use std::cell::{Cell, RefCell, RefMut}; @@ -87,25 +86,6 @@ impl ToString for ChangelogState { } } -// When dynamically linked against libcurl, we want to ignore some failures -// when using old versions that don't support certain features. -// -// NOTE: lifted from src/cargo/core/package.rs -macro_rules! try_old_curl { - ($e:expr, $msg:expr) => { - let result = $e; - if cfg!(target_os = "macos") { - if let Err(e) = result { - warn!("ignoring libcurl {} error: {}", $msg, e); - } - } else { - result.with_context(|| { - anyhow::format_err!("failed to enable {}, is curl not built right?", $msg) - })?; - } - }; -} - pub struct HttpRegistry<'cfg> { index_path: Filesystem, cache_path: Filesystem, From 6881d6763d25d2e86929b61e8ceb1981f760b853 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 12:37:24 -0800 Subject: [PATCH 05/83] Tidy up significantly --- src/cargo/sources/registry/http_remote.rs | 545 +++++++++++++++------- 1 file changed, 378 insertions(+), 167 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 37e098dd9a4..1c204d813c3 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -1,3 +1,7 @@ +//! Access to a HTTP-based crate registry. +//! +//! See [`HttpRegistry`] for details. + use crate::core::{PackageId, SourceId}; use crate::ops; use crate::sources::registry::make_dep_prefix; @@ -21,12 +25,34 @@ use std::path::Path; use std::str; #[derive(Debug, Copy, Clone, PartialEq, Eq)] +/// The last known state of the changelog. enum ChangelogState { + /// The changelog is in an unknown state. + /// + /// This can be because we've never fetched it before, or because it was empty last time we + /// looked (so it did not contain an `epoch`). Unknown, + + /// The server does not host a changelog. + /// + /// In this state, we must double-check with the server every time we want to load an index + /// file in case that file has changed upstream. + // TODO: we may need each Unsupported to have a distinct string representation to bust caches? Unsupported, + + /// The server served us a changelog in the past. Synchronized { + /// The last known changelog epoch (see the RFC). + /// + /// The epoch allows the server to start the changelog over for garbage-collection purposes + /// in a way that the client can detect. epoch: usize, - changelog_offset: usize, + + /// The last known length of the changelog (in bytes). + /// + /// This is used to efficiently fetch only the suffix of the changelog that has been + /// appended since we last read it. + length: usize, }, } @@ -63,13 +89,9 @@ impl std::str::FromStr for ChangelogState { let mut parts = s.split('.'); let epoch = parts.next().expect("split always yields one item"); let epoch = usize::from_str_radix(epoch, 10).map_err(|_| "invalid epoch")?; - let changelog_offset = parts.next().ok_or("no changelog offset")?; - let changelog_offset = - usize::from_str_radix(changelog_offset, 10).map_err(|_| "invalid changelog offset")?; - Ok(ChangelogState::Synchronized { - epoch, - changelog_offset, - }) + let length = parts.next().ok_or("no changelog offset")?; + let length = usize::from_str_radix(length, 10).map_err(|_| "invalid changelog offset")?; + Ok(ChangelogState::Synchronized { epoch, length }) } } @@ -78,14 +100,31 @@ impl ToString for ChangelogState { match *self { ChangelogState::Unknown => String::from("unknown"), ChangelogState::Unsupported => String::from("unsupported"), - ChangelogState::Synchronized { - epoch, - changelog_offset, - } => format!("{}.{}", epoch, changelog_offset), + ChangelogState::Synchronized { epoch, length } => format!("{}.{}", epoch, length), } } } +/// A registry served by the HTTP-based registry API. +/// +/// This type is primarily accessed through the [`RegistryData`] trait. +/// +/// `HttpRegistry` implements the HTTP-based registry API outlined in [RFC XXX]. Read the RFC for +/// the complete protocol, but _roughly_ the implementation loads each index file (e.g., +/// config.json or re/ge/regex) from an HTTP service rather than from a locally cloned git +/// repository. The remote service can more or less be a static file server that simply serves the +/// contents of the origin git repository. +/// +/// Implemented naively, this leads to a significant amount of network traffic, as a lookup of any +/// index file would need to check with the remote backend if the index file has changed. This +/// cost is somewhat mitigated by the use of HTTP conditional feches (`If-Modified-Since` and +/// `If-None-Match` for `ETag`s) which can be efficiently handled by HTTP/2, but it's still not +/// ideal. The RFC therefor also introduces the (optional) notion of a _changelog_. The changelog +/// is a dedicated append-only file on the server that lists every crate index change. This allows +/// the client to fetch the changelog, invalidate its locally cached index files for only the +/// changed crates, and then not worry about double-checking with the server for each index file. +/// +/// [RFC XXX]: https://github.com/rust-lang/rfcs/pull/2789 pub struct HttpRegistry<'cfg> { index_path: Filesystem, cache_path: Filesystem, @@ -94,7 +133,6 @@ pub struct HttpRegistry<'cfg> { at: Cell<(ChangelogState, InternedString)>, checked_for_at: Cell, http: RefCell>, - // dirty: RefCell> } impl<'cfg> HttpRegistry<'cfg> { @@ -136,6 +174,7 @@ const LAST_UPDATED_FILE: &str = ".last-updated"; impl<'cfg> RegistryData for HttpRegistry<'cfg> { fn prepare(&self) -> CargoResult<()> { + // Load last known changelog state from LAST_UPDATED_FILE. if self.at.get().0.is_unknown() && !self.checked_for_at.get() { self.checked_for_at.set(true); let path = self.config.assert_package_cache_locked(&self.index_path); @@ -159,6 +198,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { }; if http.is_none() { + // NOTE: lifted from src/cargo/core/package.rs + // // Ensure that we'll actually be able to acquire an HTTP handle later on // once we start trying to download crates. This will weed out any // problems with `.cargo/config` configuration related to HTTP. @@ -172,6 +213,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // TODO: explicitly enable HTTP2? // https://github.com/rust-lang/cargo/blob/905134577c1955ad7865bcf4b31440d4bc882cde/src/cargo/core/package.rs#L651-L703 + // NOTE: lifted from src/cargo/core/package.rs + // // This is an option to `libcurl` which indicates that if there's a // bunch of parallel requests to the same host they all wait until the // pipelining status of the host is known. This means that we won't @@ -179,8 +222,6 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // Once the main one is opened we realized that pipelining is possible // and multiplexing is possible with static.crates.io. All in all this // reduces the number of connections done to a more manageable state. - // - // NOTE: lifted from src/cargo/core/package.rs try_old_curl!(handle.pipewait(true), "pipewait"); *http = Some(handle); } @@ -189,9 +230,9 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } fn index_path(&self) -> &Filesystem { - // NOTE: pretty sure this method is unnecessary. - // the only place it is used is to set .path in RegistryIndex, - // which only uses it to call assert_index_locked below... + // NOTE: I'm pretty sure this method is unnecessary. + // The only place it is used is to set `.path` in `RegistryIndex`, + // which only uses it to call `assert_index_locked below`... &self.index_path } @@ -214,9 +255,28 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { path: &Path, data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, ) -> CargoResult<()> { + // A quick overview of what goes on below: + // + // We first check if we have a local copy of the given index file. + // + // If we do, and the server has a changelog, then we know that the index file is up to + // date (as of when we last checked the changelog), so there's no need to double-check with + // the server that the file isn't stale. We can just return its contents directly. If we + // _need_ a newer version of it, `update_index` will be called and then `load` will be + // called again. + // + // If we do, but the server does not have a changelog, we need to check with the server if + // the index file has changed upstream. We do this using a conditional HTTP request using + // the `Last-Modified` and `ETag` headers we got when we fetched the currently cached index + // file (those headers are stored in the first two lines of each index file). That way, if + // nothing has changed (likely the common case), the server doesn't have to send us + // any data, just a 304 Not Modified. + // + // If we don't have a local copy of the index file, we need to fetch it from the server. let pkg = root.join(path); let bytes; let was = if pkg.exists() { + // We have a local copy -- extract the `Last-Modified` and `Etag` headers. bytes = paths::read_bytes(&pkg)?; let mut lines = bytes.splitn(3, |&c| c == b'\n'); let etag = lines.next().expect("splitn always returns >=1 item"); @@ -231,13 +291,17 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { anyhow::bail!("index file is missing HTTP header header"); }; - if !self.at.get().0.is_unsupported() || self.config.offline() { + // NOTE: We should always double-check for changes to config.json. + let double_check = self.at.get().0.is_unsupported() || path.ends_with("config.json"); + + // NOTE: If we're in offline mode, we don't double-check with the server. + if !double_check || self.config.offline() { return data(rest); } else { - // we cannot trust the index files -- need to check with server + // We cannot trust the index files and need to double-check with server. let etag = std::str::from_utf8(etag)?; let last_modified = std::str::from_utf8(last_modified)?; - Some((etag, last_modified)) + Some((etag, last_modified, rest)) } } else { None @@ -255,7 +319,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let mut handle = self.http()?; handle.url(&format!("{}{}", url, path.display()))?; - if let Some((etag, last_modified)) = was { + if let Some((ref etag, ref last_modified, _)) = was { let mut list = List::new(); list.append(&format!("If-None-Match: {}", etag))?; list.append(&format!("If-Modified-Since: {}", last_modified))?; @@ -271,7 +335,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { Ok(buf.len()) })?; - // capture ETag and Last-Modified + // Capture ETag and Last-Modified. transfer.header_function(|buf| { const ETAG: &'static [u8] = b"ETag:"; const LAST_MODIFIED: &'static [u8] = b"Last-Modified:"; @@ -287,13 +351,14 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { return true; }; - // don't let server sneak more lines into index file + // Don't let server sneak more lines into index file. if buf.contains(&b'\n') { return true; } if let Ok(buf) = std::str::from_utf8(buf) { let buf = buf.trim(); + // Append a new line to each so we can easily prepend to the index file. let mut s = String::with_capacity(buf.len() + 1); s.push_str(buf); s.push('\n'); @@ -307,28 +372,34 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { true })?; - // TODO: should we display transfer status here somehow? + // TODO: Should we display transfer status here somehow? transfer .perform() .chain_err(|| format!("failed to fetch index file `{}`", path.display()))?; drop(transfer); - // don't send If-Modified-Since with future requests + // Avoid the same conditional headers being sent in future re-uses of the `Easy` client. let mut list = List::new(); list.append("If-Modified-Since:")?; + list.append("If-None-Match:")?; handle.http_headers(list)?; match handle.response_code()? { 200 => {} 304 => { - // not modified - assert!(was.is_some()); + // Not Modified response. + let (_, _, bytes) = + was.expect("conditional request response implies we have local index file"); + return data(bytes); } 404 | 410 | 451 => { - // crate was deleted from the registry. - // nothing to do here since we already deleted the file from the index. - // we just won't populate it again. + // The crate was deleted from the registry. + if was.is_some() { + // Make sure we delete the local index file. + debug!("crate {} was deleted from the registry", path.display()); + paths::remove_file(&pkg)?; + } anyhow::bail!("crate has been deleted from the registry"); } code => { @@ -371,6 +442,9 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { return Ok(()); } + // NOTE: We check for the changelog even if the server did not previously have a changelog + // in case it has wisened up since then. + debug!("updating the index"); self.prepare()?; @@ -379,55 +453,43 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { .shell() .status("Updating", self.source_id.display_index())?; - // Fetch the tail of the changelog. let url = self.source_id.url(); - // let mut progress = Progress::new("Fetch", config); - // TODO: retry logic? network::with_retry + let mut handle = self.http()?; + handle.url(&format!("{}/changelog", url))?; + + // TODO: Retry logic using network::with_retry? - enum ChangelogUse { + /// How are we attempting to fetch the changelog? + #[derive(Debug, Copy, Clone)] + enum ChangelogStrategy { /// We are fetching the changelog with no historical context. FirstFetch { full: bool }, /// We are trying to follow the changelog to update our view of the index. - Follow { - epoch: usize, - changelog_offset: usize, - }, + Follow { epoch: usize, length: usize }, } - - let mut handle = self.http()?; - // TODO: .join? may do the wrong thing if url does not end with / - handle.url(&format!("{}/changelog", url))?; - let mut plan = if let ChangelogState::Synchronized { - epoch, - changelog_offset, - } = self.at.get().0 - { - ChangelogUse::Follow { - epoch, - changelog_offset, - } + let mut plan = if let ChangelogState::Synchronized { epoch, length } = self.at.get().0 { + ChangelogStrategy::Follow { epoch, length } } else { - ChangelogUse::FirstFetch { full: false } + ChangelogStrategy::FirstFetch { full: false } }; + // NOTE: Loop in case of rollover, in which case we need to fetch it starting at byte 0. 'changelog: loop { - // reset in case we looped + // Reset in case we looped. handle.range("")?; handle.resume_from(0)?; match plan { - ChangelogUse::Follow { - changelog_offset, .. - } => { - handle.resume_from(changelog_offset as u64)?; + ChangelogStrategy::Follow { length, .. } => { + handle.resume_from(length as u64)?; } - ChangelogUse::FirstFetch { full: false } => { - // we really just need the epoch number and file size, + ChangelogStrategy::FirstFetch { full: false } => { + // We really just need the epoch number and file size, // which we can get at by fetching just the first line. // "1 2019-10-18 23:51:23 ".len() == 22 handle.range("0-22")?; } - ChangelogUse::FirstFetch { full: _ } => {} + ChangelogStrategy::FirstFetch { full: _ } => {} } let mut contents = Vec::new(); @@ -438,6 +500,10 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { Ok(buf.len()) })?; + // Extract `Content-Range` header to learn the total size of the changelog. + // + // We need the total size from `Content-Range` since we only fetch a very small subset + // of the changelog when we first access the server (just enought to get the epoch). transfer.header_function(|buf| { const CONTENT_RANGE: &'static [u8] = b"Content-Range:"; if buf.len() > CONTENT_RANGE.len() @@ -445,24 +511,25 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { { let mut buf = &buf[CONTENT_RANGE.len()..]; - // trim whitespace + // Trim leading whitespace. while !buf.is_empty() && buf[0] == b' ' { buf = &buf[1..]; } - // check that the Content-Range unit is indeed bytes + // Check that the Content-Range unit is indeed bytes. const BYTES_UNIT: &'static [u8] = b"bytes "; if !buf.starts_with(BYTES_UNIT) { return true; } buf = &buf[BYTES_UNIT.len()..]; - // extract out the total length (if known) + // Extract out the total length. let rest = buf.splitn(2, |&c| c == b'/'); if let Some(complete_length) = rest.skip(1 /* byte-range */).next() { if complete_length.starts_with(b"*") { - // total length isn't known - // this seems weird, but shrug + // The server does not know the total size of the changelog. + // This seems weird, but not much we can do about it. + // We'll end up falling back to a full fetch. return true; } let complete_length = complete_length @@ -482,7 +549,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { true })?; - // TODO: should we show status/progress here? + // TODO: Should we show progress here somehow? transfer .perform() @@ -492,59 +559,49 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let mut contents = &contents[..]; let total_bytes = match handle.response_code()? { 200 => { - // server does not support Range: - // so we need to manually slice contents + // The server does not support Range: requests, + // so we need to manually slice the bytes we got back. let total_bytes = contents.len(); - if let ChangelogUse::Follow { - changelog_offset, .. - } = plan - { - if contents.len() < changelog_offset { - // must have rolled over. - // luckily, since the server sent the whole response, + if let ChangelogStrategy::Follow { length, .. } = plan { + if contents.len() < length || contents.len() == 0 { + // The changelog must have rolled over. + // Luckily, since the server sent the whole response, // we can just continue as if that was our plan all along. - plan = ChangelogUse::FirstFetch { full: true }; + plan = ChangelogStrategy::FirstFetch { full: true }; } else { - contents = &contents[changelog_offset..]; - if contents.is_empty() { - // no changes in changelog - break; - } + contents = &contents[length..]; } } total_bytes } 206 => { + // 206 Partial Content -- this is what we expect to get. match total_bytes { None => { + // The server sent us back only the byte range we asked for, + // but it did not inform us of the total size of the changelog. + // This is fine if we're just following the changelog, since we can + // compute the total size (old size + size of content), but if we're + // trying to _start_ following the changelog, we need to know its + // current size to know where to fetch from next time! match plan { - ChangelogUse::FirstFetch { full } => { + ChangelogStrategy::FirstFetch { full } => { assert!(!full, "got partial response without Range:"); - // we need to know the total size of the changelog to know our - // next offset. but, the server didn't give that to us when we - // requested just the first few bytes, so we need to do a full - // request. - plan = ChangelogUse::FirstFetch { full: true }; + // Our only recourse is to fetch the full changelog. + plan = ChangelogStrategy::FirstFetch { full: true }; continue; } - ChangelogUse::Follow { - changelog_offset, .. - } => changelog_offset + contents.len(), + ChangelogStrategy::Follow { length, .. } => length + contents.len(), } } Some(b) => b, } } - 204 => { - // no changes in changelog - assert!(self.at.get().0.is_synchronized()); - break; - } 404 => { - // server does not have a changelog + // The server does not have a changelog. if self.at.get().0.is_synchronized() { - // we used to have a changelog, but now we don't. it's important that we + // We used to have a changelog, but now we don't. It's important that we // record that fact so that later calls to load() will all double-check // with the server. self.at.set(ChangelogState::Unsupported.into()); @@ -552,133 +609,287 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { break; } 416 => { - // Range Not Satisfiable - // changelog must have been rolled over - if let ChangelogUse::FirstFetch { full: false } = plan { - // the changelog is _probably_ empty - plan = ChangelogUse::FirstFetch { full: true }; - } else { - plan = ChangelogUse::FirstFetch { full: false }; + // 416 Range Not Satisfiable + // + // This can mean one of two things: + // + // 1. The changelog has rolled over, so we requested too much data. + // 2. There are no new entries (our request goes beyond the end of the + // changelog). + // + // If we hit case 1, we need to fetch the start of the new changelog instead. + // If we hit case 2, what we'd like to do is, well, nothing. + match (plan, total_bytes) { + (ChangelogStrategy::Follow { length, .. }, Some(total_bytes)) + if length == total_bytes => + { + contents = &[]; + total_bytes + } + // We must assume we're in case 1. + (ChangelogStrategy::FirstFetch { full }, _) => { + // Our request for just the start of the changelog (Range: 0-22) failed. + // This probably means that the changelog is empty, but we do a full fetch + // to make sure. + assert!(!full); + plan = ChangelogStrategy::FirstFetch { full: true }; + continue; + } + (ChangelogStrategy::Follow { .. }, _) => { + // We requested a byte range past the end of the changelog, which + // implies that it must have rolled over (and shrunk). + plan = ChangelogStrategy::FirstFetch { full: false }; + continue; + } } - continue; } code => { anyhow::bail!("server returned unexpected HTTP status code {}", code); } }; + if contents.len() == 0 { + if total_bytes == 0 { + // We can't use the changelog, since we don't know its epoch. + self.at.set(ChangelogState::Unknown.into()); + } else { + // There are no changes in changelog, so there's supposedly nothing to update. + // + // TODO: This isn't fool-proof. It _could_ be that the changelog rolled over, + // and just so happens to be exactly the same length as the old changelog was + // last time we checked it. This is quite unlikely, but not impossible. To fix + // this, we should keep track of ETag + Last-Modified, and check that here. If + // they do not match, then fall back to a ::FirstFetch. + } + break; + } + + enum WhatLine { + First, + Second { first_failed: bool }, + Later, + } + let mut at = WhatLine::First; + let mut line = String::new(); let mut new_changelog = false; let mut fetched_epoch = None; while contents.read_line(&mut line)? != 0 { - let mut parts = line.trim().splitn(2, ' '); + // First, make sure that the line is a _complete_ line. + // It's possible that the changelog rolled over, _but_ our old range was still + // valid. In that case, the returned content may not start at a line bounary, and + // parsing will fail in weird ways. Or worse yet, succeed but with an incorrect + // epoch number! Should that happen, we need to detect it. + // + // Lines _should_ look like this: + // 1 2019-10-18 23:52:00 anyhow + // + // That is: epoch date time crate. + let mut parts = line.trim().split_whitespace(); let epoch = parts.next().expect("split always has one element"); - if epoch.is_empty() { - // skip empty lines - continue; - } + let krate = parts.skip(2).next(); + let epoch = if let Ok(epoch) = epoch.parse::() { fetched_epoch = Some(epoch); epoch + } else if let WhatLine::First = at { + // The line is clearly not valid. + // + // This means the changelog rolled over. Unfortunately, the byte range we + // requested does not contain the epoch, so we don't have enough information to + // move forwards. We need to parse one more line. + + // If we got here during a first fetch (which fetches starting at byte 0), the + // server's changelog is entirely bad. + if let ChangelogStrategy::FirstFetch { .. } = plan { + warn!("server changelog does not begin with an epoch"); + // Ensure that all future index fetches check with server + self.at.set(ChangelogState::Unsupported.into()); + break 'changelog; + } + + debug!( + "index {} changelog has invalid first line; assuming rollover", + url + ); + at = WhatLine::Second { first_failed: true }; + continue; } else { warn!("index {} changelog has invalid lines", url); - // ensure that all future index fetches check with server + // Ensure that all future index fetches check with server self.at.set(ChangelogState::Unsupported.into()); break 'changelog; }; match plan { - ChangelogUse::FirstFetch { .. } => { - new_changelog = true; + ChangelogStrategy::FirstFetch { .. } => { + // This requested bytes starting at 0, so the epoch we parsed out is valid. - // we don't actually care about the remainder of the changelog, + // We don't actually care about the remainder of the changelog, // since we've completely purged our local index. + new_changelog = true; + at = WhatLine::Later; break; } - ChangelogUse::Follow { + ChangelogStrategy::Follow { epoch: last_epoch, .. } if last_epoch != epoch => { + // There has clearly been a rollover, though we have to be a little + // careful. Since we requested a particular byte offset, the parsed epoch + // may not actually have been the "true" epoch. Imagine that we fetched: + // + // 1 2019-10-18 23:52:00 anyhow + // + // it _could_ be that that's just an unfortunate slice of this line: + // + // 21 2019-10-18 23:52:00 anyhow + // + // So, we need to parse a second line to ensure we have the _true_ line. + if let WhatLine::First = at { + at = WhatLine::Second { first_failed: true }; + continue; + } + debug!("index {} changelog has rolled over", url); - // TODO: try previous changelog if available? + // TODO: Try previous changelog if available? + // https://github.com/rust-lang/rfcs/pull/2789#issuecomment-730024821 + + // We're starting over with this new, rolled-over changelog, so we don't + // care about its contents. new_changelog = true; + at = WhatLine::Later; break; } - ChangelogUse::Follow { .. } => {} + ChangelogStrategy::Follow { .. } => {} } - let rest = if let Some(rest) = parts.next() { - rest + at = match at { + WhatLine::First => WhatLine::Second { + first_failed: false, + }, + WhatLine::Second { first_failed: true } => { + // If the first line failed to parse, that must mean there was a rollover. + // If we get here, that means that we're in ::Follow mode, but that the + // next line had an epoch that _did_ match our own epoch, which would imply + // there _wasn't_ a rollover. Something is _very_ wrong. + unreachable!("server response byte offset mismatch"); + } + WhatLine::Second { first_failed: _ } | WhatLine::Later => WhatLine::Later, + }; + + let krate = if let Some(krate) = krate { + krate } else { - warn!("index {} changelog has invalid lines", url); - // ensure that all future index fetches check with server + warn!("index {} changelog has an invalid line: {}", url, line); + + // We could error out here, but it's always safe for us to ignore the changelog + // and just double-check all index file loads instead, so we prefer that. self.at.set(ChangelogState::Unsupported.into()); break 'changelog; }; - let mut parts = rest.rsplitn(2, ' '); - let krate = parts.next().expect("rsplit always has one element"); + if krate.is_empty() { - warn!("index {} changelog has invalid lines", url); - // ensure that all future index fetches check with server + warn!("index {} changelog has an invalid line: {}", url, line); + + // Same as above -- prefer working to failing. self.at.set(ChangelogState::Unsupported.into()); break 'changelog; } - // remove the index file -- we'll have to re-fetch it + // Remove the outdated index file -- we'll have to re-fetch it let path = path.join(&Path::new(&make_dep_prefix(krate))).join(krate); if path.exists() { paths::remove_file(path)?; } } - if total_bytes == 0 { - // the changelog has rolled over, but we didn't realize since we didn't actually - // _observe_ another epoch number. catch that here. - new_changelog = true; + if let WhatLine::Second { first_failed } = at { + let (epoch, length) = if let ChangelogStrategy::Follow { epoch, length } = plan { + (epoch, length) + } else { + unreachable!("::FirstFetch always breaks on the first line"); + }; + + if first_failed { + // The changelog must have rolled over. This means that whatever we got in + // `fetched_epoch` may not be valid due to weird byte offsets. Unfortunately, + // we never got a second line to ensure we parsed a complete epoch either! Our + // only option here is to do another request to the server for the start of the + // changelog. + plan = ChangelogStrategy::FirstFetch { full: false }; + continue; + } + + // There is a _slight_ chance that there was a rollover, and that the + // byte offset we provided happened to be valid, and happened to perfectly + // align so that the string starts with a number that just so happens to be + // the same as the old epoch. That's... weird, but possible. + // + // Basically, imagine that the previous epoch we knew about was 3, and the first + // (and only) line we got in the changelog diff we requested was: + // + // 3 2019-10-18 23:52:00 anyhow + // + // All good, right? Well, not _quite_. + // What if that is just a weird slicing of this line: + // + // 13 2019-10-18 23:52:00 anyhow + // + // And since there was no second line, we never saw epoch 13, and just kept going + // as if everything is fine. To make absolutely sure, we do another fetch of the + // changelog that includes some earlier data as well. That fetch should get more + // than one line, and so detect any such epoch shenanigans. + plan = ChangelogStrategy::Follow { + epoch, + // How far back we go here isn't super important. We just have to make sure we + // go at least one line back, so that the response will include at least two + // lines. The longer back we go, the more index entries we will unnecessarily + // invalidate. If we don't go far enough, we'll just end up in this clause + // again and do another round trip to go further back. + length: length.saturating_sub(16), + }; + continue; } + let epoch = + fetched_epoch.expect("changelog was non-empty, and epoch parsing didn't fail"); + if new_changelog { - if let Some(epoch) = fetched_epoch { - debug!( - "index {} is at epoch {} (offset: {})", - url, epoch, total_bytes - ); + debug!( + "index {} is at epoch {} (offset: {})", + url, epoch, total_bytes + ); - // we don't know which index entries are now invalid and which are not. - // so we purge them all. - // XXX: will this cause issues with directory locking? - paths::remove_dir_all(&path)?; - paths::create_dir_all(&path)?; - - // but from this point forward we're synchronized - self.at.set( - ChangelogState::Synchronized { - epoch, - changelog_offset: total_bytes, - } - .into(), - ); - } else { - // we have a new changelog, but we don't know what the epoch of that changelog - // is since it was empty (otherwise fetched_epoch would be Some). - self.at.set(ChangelogState::Unknown.into()); - } - break; + // We don't know which index entries are now invalid and which are not, + // so we have to purge them all. + // + // TODO: Will this cause issues with directory locking? + paths::remove_dir_all(&path)?; + paths::create_dir_all(&path)?; + + // From this point forward, we're synchronized with the changelog! + self.at.set( + ChangelogState::Synchronized { + epoch, + length: total_bytes, + } + .into(), + ); + } else { + // Keep track of our new byte offset into the changelog. + self.at.set( + ChangelogState::Synchronized { + epoch, + length: total_bytes, + } + .into(), + ); } - - // keep track of our new byte offset in the changelog - let epoch = fetched_epoch.expect("changelog was non-empty (total_bytes != 0)"); - self.at.set( - ChangelogState::Synchronized { - epoch, - changelog_offset: total_bytes, - } - .into(), - ); break; } - // reset the http handle + // Reset the http handle for later requests that re-use the Easy. handle.range("")?; handle.resume_from(0)?; From c2b7ec77512591ff3317b46369f89ec31d40d8d1 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 17:08:34 -0800 Subject: [PATCH 06/83] Detect rfc+http as HTTP registry --- src/cargo/sources/config.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/cargo/sources/config.rs b/src/cargo/sources/config.rs index 71a9a7194c0..ce7ef6c58da 100644 --- a/src/cargo/sources/config.rs +++ b/src/cargo/sources/config.rs @@ -207,7 +207,18 @@ restore the source replacement configuration to continue the build let mut srcs = Vec::new(); if let Some(registry) = def.registry { let url = url(®istry, &format!("source.{}.registry", name))?; - srcs.push(SourceId::for_registry(&url)?); + if url.scheme().starts_with("rfc+") { + // NOTE: it is illegal to use set_scheme to change rfc+http(s) to http(s). + let url = url + .to_string() + .strip_prefix("rfc+") + .unwrap() + .into_url() + .unwrap(); + srcs.push(SourceId::for_http_registry(&url)?); + } else { + srcs.push(SourceId::for_registry(&url)?); + } } if let Some(local_registry) = def.local_registry { let path = local_registry.resolve_path(self.config); From e67ea7944a0e70c14ade50f364eb879a47a0352e Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 17:09:01 -0800 Subject: [PATCH 07/83] Double-check anything that's not synchronized --- src/cargo/sources/registry/http_remote.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 1c204d813c3..e91dbc02544 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -63,9 +63,6 @@ impl ChangelogState { fn is_unknown(&self) -> bool { matches!(self, ChangelogState::Unknown) } - fn is_unsupported(&self) -> bool { - matches!(self, ChangelogState::Unsupported) - } } impl Into<(ChangelogState, InternedString)> for ChangelogState { @@ -292,7 +289,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { }; // NOTE: We should always double-check for changes to config.json. - let double_check = self.at.get().0.is_unsupported() || path.ends_with("config.json"); + let double_check = !self.at.get().0.is_synchronized() || path.ends_with("config.json"); // NOTE: If we're in offline mode, we don't double-check with the server. if !double_check || self.config.offline() { From 4c1af6740167d784698ffb4c925889b3d959f1ac Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 17:09:28 -0800 Subject: [PATCH 08/83] Add more debug output --- src/cargo/sources/registry/http_remote.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index e91dbc02544..f465d09ca4a 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -274,6 +274,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let bytes; let was = if pkg.exists() { // We have a local copy -- extract the `Last-Modified` and `Etag` headers. + trace!("load {} from disk", path.display()); + bytes = paths::read_bytes(&pkg)?; let mut lines = bytes.splitn(3, |&c| c == b'\n'); let etag = lines.next().expect("splitn always returns >=1 item"); @@ -291,6 +293,22 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // NOTE: We should always double-check for changes to config.json. let double_check = !self.at.get().0.is_synchronized() || path.ends_with("config.json"); + if double_check { + if self.config.offline() { + debug!( + "not double-checking freshness of {} due to offline", + path.display() + ); + } else { + debug!("double-checking freshness of {}", path.display()); + } + } else { + debug!( + "using {} from cache as changelog is synchronized", + path.display() + ); + } + // NOTE: If we're in offline mode, we don't double-check with the server. if !double_check || self.config.offline() { return data(rest); @@ -314,6 +332,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { self.prepare()?; let mut handle = self.http()?; + debug!("fetch {}{}", url, path.display()); handle.url(&format!("{}{}", url, path.display()))?; if let Some((ref etag, ref last_modified, _)) = was { @@ -382,6 +401,10 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { list.append("If-None-Match:")?; handle.http_headers(list)?; + debug!( + "index file downloaded with status code {}", + handle.response_code()? + ); match handle.response_code()? { 200 => {} 304 => { From 011e483119ad534c133e829537c9bf7f9b5a14c9 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 17:10:08 -0800 Subject: [PATCH 09/83] Handle directories that don't exist --- src/cargo/sources/registry/http_remote.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index f465d09ca4a..8b4f244f6ce 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -427,6 +427,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } } + paths::create_dir_all(pkg.parent().expect("pkg is a file"))?; let mut file = paths::create(&root.join(path))?; file.write_all(etag.as_deref().unwrap_or("\n").as_bytes())?; file.write_all(last_modified.as_deref().unwrap_or("\n").as_bytes())?; @@ -885,8 +886,10 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // so we have to purge them all. // // TODO: Will this cause issues with directory locking? - paths::remove_dir_all(&path)?; - paths::create_dir_all(&path)?; + if path.exists() { + paths::remove_dir_all(&path)?; + paths::create_dir_all(&path)?; + } // From this point forward, we're synchronized with the changelog! self.at.set( @@ -916,7 +919,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { self.config.updated_sources().insert(self.source_id); // Record the latest known state of the index. - paths::write(&path.join(LAST_UPDATED_FILE), self.at.get().1.as_bytes())?; + if !path.exists() { + paths::create_dir_all(&path)?; + } + let mut file = paths::create(&path.join(LAST_UPDATED_FILE))?; + file.write_all(self.at.get().1.as_bytes())?; + file.flush()?; Ok(()) } From ff86ee2c541c530bff67963d1958da2776b56dbc Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 17:10:17 -0800 Subject: [PATCH 10/83] Allow empty lines in changelog --- src/cargo/sources/registry/http_remote.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 8b4f244f6ce..d733c2fe558 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -710,6 +710,19 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let epoch = parts.next().expect("split always has one element"); let krate = parts.skip(2).next(); + if epoch.is_empty() { + // Skip empty lines. + + // We _have_ observed a line change though, + // so the next epoch read is guaranteed to read a complete epoch. + if let WhatLine::First = at { + at = WhatLine::Second { + first_failed: false, + }; + } + continue; + } + let epoch = if let Ok(epoch) = epoch.parse::() { fetched_epoch = Some(epoch); epoch From 2220c1900fdf466a1de4a44b262f7f17b39cc32f Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 17:10:32 -0800 Subject: [PATCH 11/83] A note about non-Range servers --- src/cargo/sources/registry/http_remote.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index d733c2fe558..0848bdafa60 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -582,6 +582,10 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { 200 => { // The server does not support Range: requests, // so we need to manually slice the bytes we got back. + // + // TODO: This is a really bad operating state! We're fetching the _entire_ + // changelog each time we update the changelog. Not clear if that's better than + // just validating each index lookup? let total_bytes = contents.len(); if let ChangelogStrategy::Follow { length, .. } = plan { if contents.len() < length || contents.len() == 0 { From 3ec6f870cfcfb063ca472faa589bb02e150d6efb Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 17:11:02 -0800 Subject: [PATCH 12/83] Add rudimentary testing for HTTP registry --- crates/cargo-test-support/src/registry.rs | 249 ++++++++++++++++++++++ tests/testsuite/http_registry.rs | 241 +++++++++++++++++++++ tests/testsuite/main.rs | 1 + 3 files changed, 491 insertions(+) create mode 100644 tests/testsuite/http_registry.rs diff --git a/crates/cargo-test-support/src/registry.rs b/crates/cargo-test-support/src/registry.rs index 4bb6f2aa43b..ea3c3011230 100644 --- a/crates/cargo-test-support/src/registry.rs +++ b/crates/cargo-test-support/src/registry.rs @@ -7,7 +7,12 @@ use flate2::Compression; use std::collections::HashMap; use std::fs::{self, File}; use std::io::prelude::*; +use std::io::BufReader; +use std::net::{SocketAddr, TcpListener}; use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::thread; use tar::{Builder, Header}; use url::Url; @@ -213,6 +218,230 @@ pub fn init() { ); } +pub enum RegistryServerConfiguration { + NoChangelog, + WithChangelog, + ChangelogNoRange, +} + +pub struct RegistryServer { + done: Arc, + server: Option>, + addr: SocketAddr, +} + +impl RegistryServer { + pub fn addr(&self) -> SocketAddr { + self.addr + } +} + +impl Drop for RegistryServer { + fn drop(&mut self) { + self.done.store(true, Ordering::SeqCst); + // NOTE: we can't actually await the server since it's blocked in accept() + let _ = self.server.take().unwrap(); + } +} + +#[must_use] +pub fn serve_registry( + registry_path: PathBuf, + config: RegistryServerConfiguration, +) -> RegistryServer { + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let addr = listener.local_addr().unwrap(); + let done = Arc::new(AtomicBool::new(false)); + let done2 = done.clone(); + + let t = thread::spawn(move || { + let support_range = !matches!(config, RegistryServerConfiguration::ChangelogNoRange); + + let mut line = String::new(); + 'server: while !done2.load(Ordering::SeqCst) { + let (socket, _) = listener.accept().unwrap(); + // Let's implement a very naive static file HTTP server. + let mut buf = BufReader::new(socket); + + // First, the request line: + // GET /path HTTPVERSION + line.clear(); + if buf.read_line(&mut line).unwrap() == 0 { + // Connection terminated. + continue; + } + + assert!(line.starts_with("GET "), "got non-GET request: {}", line); + let path = PathBuf::from( + line.split_whitespace() + .skip(1) + .next() + .unwrap() + .trim_start_matches('/'), + ); + + let file = registry_path.join(path); + let mut exists = file.exists(); + if file.ends_with("changelog") + && matches!(config, RegistryServerConfiguration::NoChangelog) + { + exists = false; + } + + if exists { + // Grab some other headers we may care about. + let mut range = None; + let mut if_modified_since = None; + let mut if_none_match = None; + loop { + line.clear(); + if buf.read_line(&mut line).unwrap() == 0 { + continue 'server; + } + + if line == "\r\n" { + // End of headers. + line.clear(); + break; + } + + let value = line + .splitn(2, ':') + .skip(1) + .next() + .map(|v| v.trim()) + .unwrap(); + + if line.starts_with("Range:") { + let value = value.strip_prefix("bytes=").unwrap_or(value); + if !value.is_empty() { + let mut parts = value.split('-'); + let start = parts.next().unwrap().parse::().unwrap(); + let end = parts.next().unwrap(); + let end = if end.is_empty() { + None + } else { + Some(end.parse::().unwrap()) + }; + range = Some((start, end)); + } + } else if line.starts_with("If-Modified-Since:") { + if_modified_since = Some(value.to_owned()); + } else if line.starts_with("If-None-Match:") { + if_none_match = Some(value.trim_matches('"').to_owned()); + } + } + + // Now grab info about the file. + let data = fs::read(&file).unwrap(); + let etag = Sha256::new().update(&data).finish_hex(); + let last_modified = format!("{:?}", file.metadata().unwrap().modified().unwrap()); + + // Start to construct our response: + let mut any_match = false; + let mut all_match = true; + if let Some(expected) = if_none_match { + if etag != expected { + all_match = false; + } else { + any_match = true; + } + } + if let Some(expected) = if_modified_since { + // NOTE: Equality comparison is good enough for tests. + if last_modified != expected { + all_match = false; + } else { + any_match = true; + } + } + if any_match { + assert!(range.is_none()); + } + + // Write out the main response line. + let data_len = data.len(); + let mut data = &data[..]; + if any_match && all_match { + buf.get_mut() + .write_all(b"HTTP/1.1 304 Not Modified\r\n") + .unwrap(); + } else if range.is_none() || !support_range { + buf.get_mut().write_all(b"HTTP/1.1 200 OK\r\n").unwrap(); + } else if let Some((start, end)) = range { + if start >= data.len() + || end.unwrap_or(0) >= data.len() + || end.unwrap_or(start) <= start + { + buf.get_mut() + .write_all(b"HTTP/1.1 416 Range Not Satisfiable\r\n") + .unwrap(); + } else { + buf.get_mut() + .write_all(b"HTTP/1.1 206 Partial Content\r\n") + .unwrap(); + + // Slice the data as requested and include a header indicating that. + // Note that start and end are both inclusive! + data = &data[start..=end.unwrap_or(data_len - 1)]; + buf.get_mut() + .write_all( + format!( + "Content-Range: bytes {}-{}/{}\r\n", + start, + end.unwrap_or(data_len - 1), + data_len + ) + .as_bytes(), + ) + .unwrap(); + } + } + // TODO: Support 451 for crate index deletions. + + // Write out other headers. + buf.get_mut() + .write_all(format!("Content-Length: {}\r\n", data.len()).as_bytes()) + .unwrap(); + buf.get_mut() + .write_all(format!("ETag: \"{}\"\r\n", etag).as_bytes()) + .unwrap(); + buf.get_mut() + .write_all(format!("Last-Modified: {}\r\n", last_modified).as_bytes()) + .unwrap(); + + // And finally, write out the body. + buf.get_mut().write_all(b"\r\n").unwrap(); + buf.get_mut().write_all(data).unwrap(); + } else { + loop { + line.clear(); + if buf.read_line(&mut line).unwrap() == 0 { + // Connection terminated. + continue 'server; + } + + if line == "\r\n" { + break; + } + } + + buf.get_mut() + .write_all(b"HTTP/1.1 404 Not Found\r\n\r\n") + .unwrap(); + buf.get_mut().write_all(b"\r\n").unwrap(); + } + buf.get_mut().flush().unwrap(); + } + }); + + RegistryServer { + addr, + server: Some(t), + done, + } +} + pub fn init_registry(registry_path: PathBuf, dl_url: String, api_url: Url, api_path: PathBuf) { // Initialize a new registry. repo(®istry_path) @@ -454,6 +683,26 @@ impl Package { t!(fs::create_dir_all(dst.parent().unwrap())); t!(fs::write(&dst, prev + &line[..] + "\n")); + // Update changelog. + let dst = registry_path.join("changelog"); + t!(fs::create_dir_all(dst.parent().unwrap())); + let mut epoch = 1; + if dst.exists() { + // Fish out the current epoch. + let prev = fs::read_to_string(&dst).unwrap_or_default(); + let e = prev.split_whitespace().next().unwrap(); + if !e.is_empty() { + epoch = e.parse::().unwrap(); + } + } + let mut changelog = t!(fs::OpenOptions::new().append(true).create(true).open(dst)); + t!(writeln!( + changelog, + "{} 2020-11-20 16:54:07 {}", + epoch, name + )); + t!(changelog.flush()); + // Add the new file to the index. if !self.local { let repo = t!(git2::Repository::open(®istry_path)); diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs new file mode 100644 index 00000000000..fead720714e --- /dev/null +++ b/tests/testsuite/http_registry.rs @@ -0,0 +1,241 @@ +//! Tests for HTTP registry sources. + +use cargo_test_support::paths; +use cargo_test_support::registry::{ + registry_path, serve_registry, Package, RegistryServer, RegistryServerConfiguration, +}; +use cargo_test_support::{project, t}; +use std::fs; + +fn setup(config: RegistryServerConfiguration) -> RegistryServer { + let server = serve_registry(registry_path(), config); + + let root = paths::root(); + t!(fs::create_dir(&root.join(".cargo"))); + t!(fs::write( + root.join(".cargo/config"), + format!( + " + [source.crates-io] + registry = 'https://wut' + replace-with = 'my-awesome-http-registry' + + [source.my-awesome-http-registry] + registry = 'rfc+http://{}' + ", + server.addr() + ) + )); + + server +} + +fn simple(config: RegistryServerConfiguration) { + let server = setup(config); + let url = format!("http://{}/", server.addr()); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + + [dependencies] + bar = ">= 0.0.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("bar", "0.0.1").publish(); + + p.cargo("build") + .with_stderr(&format!( + "\ +[UPDATING] `{reg}` index +[DOWNLOADING] crates ... +[DOWNLOADED] bar v0.0.1 (http registry `{reg}`) +[COMPILING] bar v0.0.1 +[COMPILING] foo v0.0.1 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + reg = url + )) + .run(); + + p.cargo("clean").run(); + + // Don't download a second time + p.cargo("build") + .with_stderr( + "\ +[COMPILING] bar v0.0.1 +[COMPILING] foo v0.0.1 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + ) + .run(); +} + +#[cargo_test] +fn no_changelog_simple() { + simple(RegistryServerConfiguration::NoChangelog); +} + +#[cargo_test] +fn changelog_simple() { + simple(RegistryServerConfiguration::WithChangelog); +} + +fn deps(config: RegistryServerConfiguration) { + let server = setup(config); + let url = format!("http://{}/", server.addr()); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + + [dependencies] + bar = ">= 0.0.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("baz", "0.0.1").publish(); + Package::new("bar", "0.0.1").dep("baz", "*").publish(); + + p.cargo("build") + .with_stderr(&format!( + "\ +[UPDATING] `{reg}` index +[DOWNLOADING] crates ... +[DOWNLOADED] [..] v0.0.1 (http registry `{reg}`) +[DOWNLOADED] [..] v0.0.1 (http registry `{reg}`) +[COMPILING] baz v0.0.1 +[COMPILING] bar v0.0.1 +[COMPILING] foo v0.0.1 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + reg = url + )) + .run(); +} + +#[cargo_test] +fn no_changelog_deps() { + deps(RegistryServerConfiguration::NoChangelog); +} + +#[cargo_test] +fn changelog_deps() { + deps(RegistryServerConfiguration::WithChangelog); +} + +fn nonexistent(config: RegistryServerConfiguration) { + let _server = setup(config); + Package::new("init", "0.0.1").publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + + [dependencies] + nonexistent = ">= 0.0.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + p.cargo("build") + .with_status(101) + .with_stderr( + "\ +[UPDATING] [..] index +error: no matching package named `nonexistent` found +location searched: registry [..] +required by package `foo v0.0.1 ([..])` +", + ) + .run(); +} + +#[cargo_test] +fn no_changelog_nonexistent() { + nonexistent(RegistryServerConfiguration::NoChangelog); +} + +#[cargo_test] +fn changelog_nonexistent() { + nonexistent(RegistryServerConfiguration::WithChangelog); +} + +fn update_registry(config: RegistryServerConfiguration) { + let server = setup(config); + let url = format!("http://{}/", server.addr()); + Package::new("init", "0.0.1").publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + + [dependencies] + notyet = ">= 0.0.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + p.cargo("build") + .with_status(101) + .with_stderr_contains( + "\ +error: no matching package named `notyet` found +location searched: registry `[..]` +required by package `foo v0.0.1 ([..])` +", + ) + .run(); + + Package::new("notyet", "0.0.1").publish(); + + p.cargo("build") + .with_stderr(format!( + "\ +[UPDATING] `{reg}` index +[DOWNLOADING] crates ... +[DOWNLOADED] notyet v0.0.1 (http registry `{reg}`) +[COMPILING] notyet v0.0.1 +[COMPILING] foo v0.0.1 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + reg = url + )) + .run(); +} + +#[cargo_test] +fn no_changelog_update_registry() { + update_registry(RegistryServerConfiguration::NoChangelog); +} + +#[cargo_test] +fn changelog_update_registry() { + update_registry(RegistryServerConfiguration::WithChangelog); +} diff --git a/tests/testsuite/main.rs b/tests/testsuite/main.rs index 80d3d860c2d..e7eeb6dadba 100644 --- a/tests/testsuite/main.rs +++ b/tests/testsuite/main.rs @@ -56,6 +56,7 @@ mod git_auth; mod git_gc; mod glob_targets; mod help; +mod http_registry; mod init; mod install; mod install_upgrade; From 184fa413a32f99f0cbba929db7412c35167ccbde Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 17:30:37 -0800 Subject: [PATCH 13/83] No changelog means no version means no cache --- src/cargo/sources/registry/http_remote.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 0848bdafa60..b695458e354 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -37,7 +37,6 @@ enum ChangelogState { /// /// In this state, we must double-check with the server every time we want to load an index /// file in case that file has changed upstream. - // TODO: we may need each Unsupported to have a distinct string representation to bust caches? Unsupported, /// The server served us a changelog in the past. @@ -239,10 +238,10 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { fn current_version(&self) -> Option { let cl_state = self.at.get(); - if cl_state.0.is_unknown() { - None - } else { + if cl_state.0.is_synchronized() { Some(cl_state.1) + } else { + None } } From 9527f00203718349bc2b0aac4bdbb9ee9471493f Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 17:31:00 -0800 Subject: [PATCH 14/83] Unify unknown and unsupported changelog states They're handled the same anyway. --- src/cargo/sources/registry/http_remote.rs | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index b695458e354..1dd642d9bf7 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -27,12 +27,6 @@ use std::str; #[derive(Debug, Copy, Clone, PartialEq, Eq)] /// The last known state of the changelog. enum ChangelogState { - /// The changelog is in an unknown state. - /// - /// This can be because we've never fetched it before, or because it was empty last time we - /// looked (so it did not contain an `epoch`). - Unknown, - /// The server does not host a changelog. /// /// In this state, we must double-check with the server every time we want to load an index @@ -59,9 +53,6 @@ impl ChangelogState { fn is_synchronized(&self) -> bool { matches!(self, ChangelogState::Synchronized { .. }) } - fn is_unknown(&self) -> bool { - matches!(self, ChangelogState::Unknown) - } } impl Into<(ChangelogState, InternedString)> for ChangelogState { @@ -75,9 +66,6 @@ impl std::str::FromStr for ChangelogState { type Err = &'static str; fn from_str(s: &str) -> Result { - if s == "unknown" { - return Ok(ChangelogState::Unknown); - } if s == "unsupported" { return Ok(ChangelogState::Unsupported); } @@ -94,7 +82,6 @@ impl std::str::FromStr for ChangelogState { impl ToString for ChangelogState { fn to_string(&self) -> String { match *self { - ChangelogState::Unknown => String::from("unknown"), ChangelogState::Unsupported => String::from("unsupported"), ChangelogState::Synchronized { epoch, length } => format!("{}.{}", epoch, length), } @@ -138,7 +125,7 @@ impl<'cfg> HttpRegistry<'cfg> { cache_path: config.registry_cache_path().join(name), source_id, config, - at: Cell::new(ChangelogState::Unknown.into()), + at: Cell::new(ChangelogState::Unsupported.into()), checked_for_at: Cell::new(false), http: RefCell::new(None), } @@ -171,7 +158,7 @@ const LAST_UPDATED_FILE: &str = ".last-updated"; impl<'cfg> RegistryData for HttpRegistry<'cfg> { fn prepare(&self) -> CargoResult<()> { // Load last known changelog state from LAST_UPDATED_FILE. - if self.at.get().0.is_unknown() && !self.checked_for_at.get() { + if !self.checked_for_at.get() { self.checked_for_at.set(true); let path = self.config.assert_package_cache_locked(&self.index_path); if path.exists() { @@ -675,7 +662,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { if contents.len() == 0 { if total_bytes == 0 { // We can't use the changelog, since we don't know its epoch. - self.at.set(ChangelogState::Unknown.into()); + self.at.set(ChangelogState::Unsupported.into()); } else { // There are no changes in changelog, so there's supposedly nothing to update. // From 572ceebca6f3fab4630a74f4abf93075569cbf35 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 20 Nov 2020 17:31:25 -0800 Subject: [PATCH 15/83] More tests for HTTP registry --- crates/cargo-test-support/src/registry.rs | 1 + tests/testsuite/http_registry.rs | 167 +++++++++++++++++++++- 2 files changed, 167 insertions(+), 1 deletion(-) diff --git a/crates/cargo-test-support/src/registry.rs b/crates/cargo-test-support/src/registry.rs index ea3c3011230..5e35c207041 100644 --- a/crates/cargo-test-support/src/registry.rs +++ b/crates/cargo-test-support/src/registry.rs @@ -218,6 +218,7 @@ pub fn init() { ); } +#[derive(Debug, Copy, Clone)] pub enum RegistryServerConfiguration { NoChangelog, WithChangelog, diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index fead720714e..63099872722 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -1,6 +1,6 @@ //! Tests for HTTP registry sources. -use cargo_test_support::paths; +use cargo_test_support::paths::{self, CargoPathExt}; use cargo_test_support::registry::{ registry_path, serve_registry, Package, RegistryServer, RegistryServerConfiguration, }; @@ -239,3 +239,168 @@ fn no_changelog_update_registry() { fn changelog_update_registry() { update_registry(RegistryServerConfiguration::WithChangelog); } + +fn update_publish_then_update(config: RegistryServerConfiguration) { + let server = setup(config); + let url = format!("http://{}/", server.addr()); + + // First generate a Cargo.lock and a clone of the registry index at the + // "head" of the current registry. + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.5.0" + authors = [] + + [dependencies] + a = "0.1.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + Package::new("a", "0.1.0").publish(); + p.cargo("build").run(); + + // Next, publish a new package and back up the copy of the registry we just + // created. + Package::new("a", "0.1.1").publish(); + let registry = paths::home().join(".cargo/registry"); + let backup = paths::root().join("registry-backup"); + t!(fs::rename(®istry, &backup)); + + // Generate a Cargo.lock with the newer version, and then move the old copy + // of the registry back into place. + let p2 = project() + .at("foo2") + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.5.0" + authors = [] + + [dependencies] + a = "0.1.1" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + p2.cargo("build").run(); + registry.rm_rf(); + t!(fs::rename(&backup, ®istry)); + t!(fs::rename( + p2.root().join("Cargo.lock"), + p.root().join("Cargo.lock") + )); + + // Finally, build the first project again (with our newer Cargo.lock) which + // should force an update of the old registry, download the new crate, and + // then build everything again. + // + // However, if the server does not support a changelog, the index file will be double-checked + // with the backend when it is loaded, and will be updated at that time. There is no index + // update. + let updating = if matches!(config, RegistryServerConfiguration::NoChangelog) { + "" + } else { + "[UPDATING] [..]\n" + }; + p.cargo("build") + .with_stderr(format!( + "{u}\ +[DOWNLOADING] crates ... +[DOWNLOADED] a v0.1.1 (http registry `{reg}`) +[COMPILING] a v0.1.1 +[COMPILING] foo v0.5.0 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + u = updating, + reg = url + )) + .run(); +} + +#[cargo_test] +fn no_changelog_update_publish_then_update() { + update_publish_then_update(RegistryServerConfiguration::NoChangelog); +} + +#[cargo_test] +fn changelog_update_publish_then_update() { + update_publish_then_update(RegistryServerConfiguration::WithChangelog); +} + +fn update_multiple_packages(config: RegistryServerConfiguration) { + let server = setup(config); + let url = format!("http://{}/", server.addr()); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.5.0" + authors = [] + + [dependencies] + a = "*" + b = "*" + c = "*" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("a", "0.1.0").publish(); + Package::new("b", "0.1.0").publish(); + Package::new("c", "0.1.0").publish(); + + p.cargo("fetch").run(); + + Package::new("a", "0.1.1").publish(); + Package::new("b", "0.1.1").publish(); + Package::new("c", "0.1.1").publish(); + + p.cargo("update -pa -pb") + .with_stderr( + "\ +[UPDATING] `[..]` index +[UPDATING] a v0.1.0 -> v0.1.1 +[UPDATING] b v0.1.0 -> v0.1.1 +", + ) + .run(); + + p.cargo("update -pb -pc") + .with_stderr( + "\ +[UPDATING] `[..]` index +[UPDATING] c v0.1.0 -> v0.1.1 +", + ) + .run(); + + p.cargo("build") + .with_stderr_contains(format!("[DOWNLOADED] a v0.1.1 (http registry `{}`)", url)) + .with_stderr_contains(format!("[DOWNLOADED] b v0.1.1 (http registry `{}`)", url)) + .with_stderr_contains(format!("[DOWNLOADED] c v0.1.1 (http registry `{}`)", url)) + .with_stderr_contains("[COMPILING] a v0.1.1") + .with_stderr_contains("[COMPILING] b v0.1.1") + .with_stderr_contains("[COMPILING] c v0.1.1") + .with_stderr_contains("[COMPILING] foo v0.5.0 ([..])") + .run(); +} + +#[cargo_test] +fn no_changelog_update_multiple_packages() { + update_multiple_packages(RegistryServerConfiguration::NoChangelog); +} + +#[cargo_test] +fn changelog_update_multiple_packages() { + update_multiple_packages(RegistryServerConfiguration::WithChangelog); +} From d9aed8a39820f7abbb76858d7229452454c99274 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 23 Nov 2020 11:18:44 -0800 Subject: [PATCH 16/83] Don't write JSON-encoded name to changelog --- crates/cargo-test-support/src/registry.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/cargo-test-support/src/registry.rs b/crates/cargo-test-support/src/registry.rs index 5e35c207041..0555524c1b2 100644 --- a/crates/cargo-test-support/src/registry.rs +++ b/crates/cargo-test-support/src/registry.rs @@ -700,7 +700,7 @@ impl Package { t!(writeln!( changelog, "{} 2020-11-20 16:54:07 {}", - epoch, name + epoch, self.name )); t!(changelog.flush()); From 45dbbb70a1eec00157c9728e31063039069638e1 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 23 Nov 2020 11:18:56 -0800 Subject: [PATCH 17/83] All headers end with CRLF --- src/cargo/sources/registry/http_remote.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 1dd642d9bf7..51eb6e95a4d 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -353,13 +353,14 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { return true; }; - // Don't let server sneak more lines into index file. - if buf.contains(&b'\n') { - return true; - } - if let Ok(buf) = std::str::from_utf8(buf) { let buf = buf.trim(); + + // Don't let server sneak more lines into index file. + if buf.contains('\n') { + return true; + } + // Append a new line to each so we can easily prepend to the index file. let mut s = String::with_capacity(buf.len() + 1); s.push_str(buf); From 3604da8483432f0e2d5161645363a53d7faa8281 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 23 Nov 2020 11:19:15 -0800 Subject: [PATCH 18/83] Don't overwrite last-updated if nothing changed --- src/cargo/sources/registry/http_remote.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 51eb6e95a4d..21d3a3c8d19 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -482,6 +482,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { }; // NOTE: Loop in case of rollover, in which case we need to fetch it starting at byte 0. + let was = self.at.get(); 'changelog: loop { // Reset in case we looped. handle.range("")?; @@ -618,7 +619,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // with the server. self.at.set(ChangelogState::Unsupported.into()); } - break; + break 'changelog; } 416 => { // 416 Range Not Satisfiable @@ -922,13 +923,16 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { self.config.updated_sources().insert(self.source_id); - // Record the latest known state of the index. - if !path.exists() { - paths::create_dir_all(&path)?; + // Record the latest known state of the index if it changed. + let lu_file = path.join(LAST_UPDATED_FILE); + if !lu_file.exists() || was != self.at.get() { + if !path.exists() { + paths::create_dir_all(&path)?; + } + let mut file = paths::create(&lu_file)?; + file.write_all(self.at.get().1.as_bytes())?; + file.flush()?; } - let mut file = paths::create(&path.join(LAST_UPDATED_FILE))?; - file.write_all(self.at.get().1.as_bytes())?; - file.flush()?; Ok(()) } From 8f99f76c9f43e637c169f7e848464aea1d475d91 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 23 Nov 2020 11:19:32 -0800 Subject: [PATCH 19/83] Adopt more tests from registry.rs --- tests/testsuite/http_registry.rs | 780 ++++++++++++++++++++++++++++--- 1 file changed, 725 insertions(+), 55 deletions(-) diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index 63099872722..096e91aa982 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -1,11 +1,18 @@ //! Tests for HTTP registry sources. +// Many of these tests are copied from registry.rs. +// It'd be nice if we could share them instead. +// Also, there are many tests in registry.rs that aren't specific to registry. +// It'd be nice if those were in their own module. + use cargo_test_support::paths::{self, CargoPathExt}; use cargo_test_support::registry::{ - registry_path, serve_registry, Package, RegistryServer, RegistryServerConfiguration, + registry_path, serve_registry, Dependency, Package, RegistryServer, RegistryServerConfiguration, }; -use cargo_test_support::{project, t}; +use cargo_test_support::t; +use cargo_test_support::{basic_manifest, project}; use std::fs; +use std::path::Path; fn setup(config: RegistryServerConfiguration) -> RegistryServer { let server = serve_registry(registry_path(), config); @@ -30,6 +37,25 @@ fn setup(config: RegistryServerConfiguration) -> RegistryServer { server } +macro_rules! test_w_wo_changelog { + ($name:ident) => { + mod $name { + use super::{$name, RegistryServerConfiguration}; + + #[cargo_test] + fn no_changelog() { + $name(RegistryServerConfiguration::NoChangelog); + } + + #[cargo_test] + fn changelog() { + $name(RegistryServerConfiguration::WithChangelog); + } + } + }; +} + +test_w_wo_changelog!(simple); fn simple(config: RegistryServerConfiguration) { let server = setup(config); let url = format!("http://{}/", server.addr()); @@ -79,16 +105,7 @@ fn simple(config: RegistryServerConfiguration) { .run(); } -#[cargo_test] -fn no_changelog_simple() { - simple(RegistryServerConfiguration::NoChangelog); -} - -#[cargo_test] -fn changelog_simple() { - simple(RegistryServerConfiguration::WithChangelog); -} - +test_w_wo_changelog!(deps); fn deps(config: RegistryServerConfiguration) { let server = setup(config); let url = format!("http://{}/", server.addr()); @@ -128,16 +145,7 @@ fn deps(config: RegistryServerConfiguration) { .run(); } -#[cargo_test] -fn no_changelog_deps() { - deps(RegistryServerConfiguration::NoChangelog); -} - -#[cargo_test] -fn changelog_deps() { - deps(RegistryServerConfiguration::WithChangelog); -} - +test_w_wo_changelog!(nonexistent); fn nonexistent(config: RegistryServerConfiguration) { let _server = setup(config); Package::new("init", "0.0.1").publish(); @@ -171,16 +179,7 @@ required by package `foo v0.0.1 ([..])` .run(); } -#[cargo_test] -fn no_changelog_nonexistent() { - nonexistent(RegistryServerConfiguration::NoChangelog); -} - -#[cargo_test] -fn changelog_nonexistent() { - nonexistent(RegistryServerConfiguration::WithChangelog); -} - +test_w_wo_changelog!(update_registry); fn update_registry(config: RegistryServerConfiguration) { let server = setup(config); let url = format!("http://{}/", server.addr()); @@ -230,16 +229,157 @@ required by package `foo v0.0.1 ([..])` .run(); } -#[cargo_test] -fn no_changelog_update_registry() { - update_registry(RegistryServerConfiguration::NoChangelog); -} +test_w_wo_changelog!(invalidate_index_on_rollover); +fn invalidate_index_on_rollover(config: RegistryServerConfiguration) { + let server = setup(config); + let url = format!("http://{}/", server.addr()); -#[cargo_test] -fn changelog_update_registry() { - update_registry(RegistryServerConfiguration::WithChangelog); + // First generate a Cargo.lock and a clone of the registry index at the + // "head" of the current registry. + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.5.0" + authors = [] + + [dependencies] + a = "0.1.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + Package::new("a", "0.1.0").publish(); + p.cargo("build").run(); + + // Fish out the path to the .last-updated file + let last_updated = if !matches!(config, RegistryServerConfiguration::NoChangelog) { + let dir = fs::read_dir(paths::home().join(".cargo/registry/index/")) + .unwrap() + .last() + .unwrap() + .unwrap(); + + Some(dir.path().join(".last-updated")) + } else { + None + }; + + if let Some(last_updated) = &last_updated { + // Check the contents of the last-updated file to see that it's on epoch 1. + assert_eq!( + fs::read_to_string(last_updated).unwrap(), + format!("1.{}", "1 YYYY-MM-DD HH:MM:SS a\n".len()), + "{}", + last_updated.display() + ); + } + + // Next, publish a new version and make the changelog roll over + Package::new("a", "0.1.1").publish(); + assert!(registry_path().join("changelog").exists(),); + fs::write( + registry_path().join("changelog"), + b"2 2020-11-23 09:45:09 a\n", + ) + .unwrap(); + + // Now, try to build a project that relies on the newly published version. + // It should realize it's not in cache, and update the registry. + // The registry should detect the rollover, invalidate the cache, + // and then succeed in fetching 0.1.1. + let p2 = project() + .at("foo2") + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.5.0" + authors = [] + + [dependencies] + a = "0.1.1" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + // NOTE: we see UPDATING even when the changelog isn't used even though it is a no-op since + // update_index is called whenever a version is not in the index cache. + p2.cargo("build") + .with_stderr(format!( + "\ +[UPDATING] [..] +[DOWNLOADING] crates ... +[DOWNLOADED] a v0.1.1 (http registry `{reg}`) +[COMPILING] a v0.1.1 +[COMPILING] foo v0.5.0 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + reg = url + )) + .run(); + + if let Some(last_updated) = &last_updated { + // Check the contents of the last-updated file to see that it picked up the new epoch. + assert_eq!( + fs::read_to_string(last_updated).unwrap(), + format!("2.{}", "1 YYYY-MM-DD HH:MM:SS a\n".len()), + ); + } + + // Next, publish a new version and make the changelog empty (which is also a rollover) + Package::new("a", "0.1.2").publish(); + assert!(registry_path().join("changelog").exists(),); + fs::write(registry_path().join("changelog"), b"").unwrap(); + + // And again, build a project that depends on the new version. + // It should realize it's not in cache, and update the registry, + // which should again detect the rollover, invalidate the cache, + // and then succeed in fetching 0.1.2. + let p3 = project() + .at("foo3") + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.5.0" + authors = [] + + [dependencies] + a = "0.1.2" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + // NOTE: again, we see UPDATING even when the changelog isn't used even though it is a no-op + // since update_index is called whenever a version is not in the index cache. + p3.cargo("build") + .with_stderr(format!( + "\ +[UPDATING] [..] +[DOWNLOADING] crates ... +[DOWNLOADED] a v0.1.2 (http registry `{reg}`) +[COMPILING] a v0.1.2 +[COMPILING] foo v0.5.0 ([CWD]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + reg = url + )) + .run(); + + if let Some(last_updated) = &last_updated { + // Check the contents of the last-updated file to see that it picked up the new epoch. + assert_eq!(fs::read_to_string(last_updated).unwrap(), "unsupported"); + } } +test_w_wo_changelog!(update_publish_then_update); fn update_publish_then_update(config: RegistryServerConfiguration) { let server = setup(config); let url = format!("http://{}/", server.addr()); @@ -324,16 +464,7 @@ fn update_publish_then_update(config: RegistryServerConfiguration) { .run(); } -#[cargo_test] -fn no_changelog_update_publish_then_update() { - update_publish_then_update(RegistryServerConfiguration::NoChangelog); -} - -#[cargo_test] -fn changelog_update_publish_then_update() { - update_publish_then_update(RegistryServerConfiguration::WithChangelog); -} - +test_w_wo_changelog!(update_multiple_packages); fn update_multiple_packages(config: RegistryServerConfiguration) { let server = setup(config); let url = format!("http://{}/", server.addr()); @@ -395,12 +526,551 @@ fn update_multiple_packages(config: RegistryServerConfiguration) { .run(); } -#[cargo_test] -fn no_changelog_update_multiple_packages() { - update_multiple_packages(RegistryServerConfiguration::NoChangelog); +test_w_wo_changelog!(bundled_crate_in_registry); +fn bundled_crate_in_registry(config: RegistryServerConfiguration) { + let _server = setup(config); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.5.0" + authors = [] + + [dependencies] + bar = "0.1" + baz = "0.1" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("bar", "0.1.0").publish(); + Package::new("baz", "0.1.0") + .dep("bar", "0.1.0") + .file( + "Cargo.toml", + r#" + [package] + name = "baz" + version = "0.1.0" + authors = [] + + [dependencies] + bar = { path = "bar", version = "0.1.0" } + "#, + ) + .file("src/lib.rs", "") + .file("bar/Cargo.toml", &basic_manifest("bar", "0.1.0")) + .file("bar/src/lib.rs", "") + .publish(); + + p.cargo("run").run(); +} + +test_w_wo_changelog!(update_same_prefix_oh_my_how_was_this_a_bug); +fn update_same_prefix_oh_my_how_was_this_a_bug(config: RegistryServerConfiguration) { + let _server = setup(config); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "ugh" + version = "0.5.0" + authors = [] + + [dependencies] + foo = "0.1" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("foobar", "0.2.0").publish(); + Package::new("foo", "0.1.0") + .dep("foobar", "0.2.0") + .publish(); + + p.cargo("generate-lockfile").run(); + p.cargo("update -pfoobar --precise=0.2.0").run(); +} + +test_w_wo_changelog!(use_semver); +fn use_semver(config: RegistryServerConfiguration) { + let _server = setup(config); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + foo = "1.2.3-alpha.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("foo", "1.2.3-alpha.0").publish(); + + p.cargo("build").run(); +} + +test_w_wo_changelog!(use_semver_package_incorrectly); +fn use_semver_package_incorrectly(config: RegistryServerConfiguration) { + let _server = setup(config); + let p = project() + .file( + "Cargo.toml", + r#" + [workspace] + members = ["a", "b"] + "#, + ) + .file( + "a/Cargo.toml", + r#" + [project] + name = "a" + version = "0.1.1-alpha.0" + authors = [] + "#, + ) + .file( + "b/Cargo.toml", + r#" + [project] + name = "b" + version = "0.1.0" + authors = [] + + [dependencies] + a = { version = "^0.1", path = "../a" } + "#, + ) + .file("a/src/main.rs", "fn main() {}") + .file("b/src/main.rs", "fn main() {}") + .build(); + + p.cargo("build") + .with_status(101) + .with_stderr( + "\ +error: no matching package named `a` found +location searched: [..] +prerelease package needs to be specified explicitly +a = { version = \"0.1.1-alpha.0\" } +required by package `b v0.1.0 ([..])` +", + ) + .run(); } -#[cargo_test] -fn changelog_update_multiple_packages() { - update_multiple_packages(RegistryServerConfiguration::WithChangelog); +test_w_wo_changelog!(only_download_relevant); +fn only_download_relevant(config: RegistryServerConfiguration) { + let _server = setup(config); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [target.foo.dependencies] + foo = "*" + [dev-dependencies] + bar = "*" + [dependencies] + baz = "*" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("foo", "0.1.0").publish(); + Package::new("bar", "0.1.0").publish(); + Package::new("baz", "0.1.0").publish(); + + p.cargo("build") + .with_stderr( + "\ +[UPDATING] `[..]` index +[DOWNLOADING] crates ... +[DOWNLOADED] baz v0.1.0 ([..]) +[COMPILING] baz v0.1.0 +[COMPILING] bar v0.5.0 ([..]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s +", + ) + .run(); +} + +test_w_wo_changelog!(resolve_and_backtracking); +fn resolve_and_backtracking(config: RegistryServerConfiguration) { + let _server = setup(config); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + foo = "*" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("foo", "0.1.1") + .feature_dep("bar", "0.1", &["a", "b"]) + .publish(); + Package::new("foo", "0.1.0").publish(); + + p.cargo("build").run(); +} + +test_w_wo_changelog!(disallow_network); +fn disallow_network(config: RegistryServerConfiguration) { + let _server = setup(config); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + foo = "*" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + // TODO: this should also check that we don't access the network for things we have in cache. + p.cargo("build --frozen") + .with_status(101) + .with_stderr( + "\ +[ERROR] failed to get `foo` as a dependency of package `bar v0.5.0 [..]` + +Caused by: + failed to load source for dependency `foo` + +Caused by: + Unable to update registry [..] + +Caused by: + failed to update replaced source registry `https://github.com/rust-lang/crates.io-index` + +Caused by: + attempting to make an HTTP request, but --frozen was specified +", + ) + .run(); +} + +test_w_wo_changelog!(add_dep_dont_update_registry); +fn add_dep_dont_update_registry(config: RegistryServerConfiguration) { + let _server = setup(config); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + baz = { path = "baz" } + "#, + ) + .file("src/main.rs", "fn main() {}") + .file( + "baz/Cargo.toml", + r#" + [project] + name = "baz" + version = "0.5.0" + authors = [] + + [dependencies] + remote = "0.3" + "#, + ) + .file("baz/src/lib.rs", "") + .build(); + + Package::new("remote", "0.3.4").publish(); + + p.cargo("build").run(); + + p.change_file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + baz = { path = "baz" } + remote = "0.3" + "#, + ); + + p.cargo("build") + .with_stderr( + "\ +[COMPILING] bar v0.5.0 ([..]) +[FINISHED] [..] +", + ) + .run(); +} + +test_w_wo_changelog!(bump_version_dont_update_registry); +fn bump_version_dont_update_registry(config: RegistryServerConfiguration) { + let _server = setup(config); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + baz = { path = "baz" } + "#, + ) + .file("src/main.rs", "fn main() {}") + .file( + "baz/Cargo.toml", + r#" + [project] + name = "baz" + version = "0.5.0" + authors = [] + + [dependencies] + remote = "0.3" + "#, + ) + .file("baz/src/lib.rs", "") + .build(); + + Package::new("remote", "0.3.4").publish(); + + p.cargo("build").run(); + + p.change_file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.6.0" + authors = [] + + [dependencies] + baz = { path = "baz" } + "#, + ); + + p.cargo("build") + .with_stderr( + "\ +[COMPILING] bar v0.6.0 ([..]) +[FINISHED] [..] +", + ) + .run(); +} + +test_w_wo_changelog!(toml_lies_but_index_is_truth); +fn toml_lies_but_index_is_truth(config: RegistryServerConfiguration) { + let _server = setup(config); + Package::new("foo", "0.2.0").publish(); + Package::new("bar", "0.3.0") + .dep("foo", "0.2.0") + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.3.0" + authors = [] + + [dependencies] + foo = "0.1.0" + "#, + ) + .file("src/lib.rs", "extern crate foo;") + .publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "bar" + version = "0.5.0" + authors = [] + + [dependencies] + bar = "0.3" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + p.cargo("build -v").run(); +} + +test_w_wo_changelog!(rename_deps_and_features); +fn rename_deps_and_features(config: RegistryServerConfiguration) { + let _server = setup(config); + Package::new("foo", "0.1.0") + .file("src/lib.rs", "pub fn f1() {}") + .publish(); + Package::new("foo", "0.2.0") + .file("src/lib.rs", "pub fn f2() {}") + .publish(); + Package::new("bar", "0.2.0") + .add_dep( + Dependency::new("foo01", "0.1.0") + .package("foo") + .optional(true), + ) + .add_dep(Dependency::new("foo02", "0.2.0").package("foo")) + .feature("another", &["foo01"]) + .file( + "src/lib.rs", + r#" + extern crate foo02; + #[cfg(feature = "foo01")] + extern crate foo01; + + pub fn foo() { + foo02::f2(); + #[cfg(feature = "foo01")] + foo01::f1(); + } + "#, + ) + .publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "a" + version = "0.5.0" + authors = [] + + [dependencies] + bar = "0.2" + "#, + ) + .file( + "src/main.rs", + " + extern crate bar; + fn main() { bar::foo(); } + ", + ) + .build(); + + p.cargo("build").run(); + p.cargo("build --features bar/foo01").run(); + p.cargo("build --features bar/another").run(); +} + +test_w_wo_changelog!(ignore_invalid_json_lines); +fn ignore_invalid_json_lines(config: RegistryServerConfiguration) { + let _server = setup(config); + Package::new("foo", "0.1.0").publish(); + Package::new("foo", "0.1.1").invalid_json(true).publish(); + Package::new("foo", "0.2.0").publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "a" + version = "0.5.0" + authors = [] + + [dependencies] + foo = '0.1.0' + foo02 = { version = '0.2.0', package = 'foo' } + "#, + ) + .file("src/lib.rs", "") + .build(); + + p.cargo("build").run(); +} + +test_w_wo_changelog!(readonly_registry_still_works); +fn readonly_registry_still_works(config: RegistryServerConfiguration) { + let _server = setup(config); + Package::new("foo", "0.1.0").publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "a" + version = "0.5.0" + authors = [] + + [dependencies] + foo = '0.1.0' + "#, + ) + .file("src/lib.rs", "") + .build(); + + p.cargo("generate-lockfile").run(); + p.cargo("fetch --locked").run(); + chmod_readonly(&paths::home(), true); + p.cargo("build").run(); + // make sure we un-readonly the files afterwards so "cargo clean" can remove them (#6934) + chmod_readonly(&paths::home(), false); + + fn chmod_readonly(path: &Path, readonly: bool) { + for entry in t!(path.read_dir()) { + let entry = t!(entry); + let path = entry.path(); + if t!(entry.file_type()).is_dir() { + chmod_readonly(&path, readonly); + } else { + set_readonly(&path, readonly); + } + } + set_readonly(path, readonly); + } + + fn set_readonly(path: &Path, readonly: bool) { + let mut perms = t!(path.metadata()).permissions(); + perms.set_readonly(readonly); + t!(fs::set_permissions(path, perms)); + } } From f9b12623fcbc5bacd3785f09ce077d71274df440 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Tue, 24 Nov 2020 18:05:55 -0800 Subject: [PATCH 20/83] Push forward the greedy route This doesn't quite work yet. Something needs to call `prefetch`, and there's still an important TODO about making sure calls to `load` do not re-fetch files that have already been prefetched. --- src/cargo/sources/registry/http_remote.rs | 561 ++++++++++++++++++++-- src/cargo/sources/registry/index.rs | 114 ++++- src/cargo/sources/registry/mod.rs | 16 + 3 files changed, 638 insertions(+), 53 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 21d3a3c8d19..d8e232aec43 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -7,22 +7,28 @@ use crate::ops; use crate::sources::registry::make_dep_prefix; use crate::sources::registry::MaybeLock; use crate::sources::registry::{ - RegistryConfig, RegistryData, CRATE_TEMPLATE, LOWER_PREFIX_TEMPLATE, PREFIX_TEMPLATE, + Fetched, RegistryConfig, RegistryData, CRATE_TEMPLATE, LOWER_PREFIX_TEMPLATE, PREFIX_TEMPLATE, VERSION_TEMPLATE, }; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; use crate::util::paths; use crate::util::{Config, Filesystem, Sha256}; -use curl::easy::{Easy, List}; +use curl::easy::{Easy, HttpVersion, List}; +use curl::multi::{EasyHandle, Multi}; use log::{debug, trace, warn}; use std::cell::{Cell, RefCell, RefMut}; +use std::collections::{HashMap, HashSet}; use std::fmt::Write as FmtWrite; use std::fs::{self, File, OpenOptions}; use std::io::prelude::*; use std::io::SeekFrom; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::str; +use std::time::Duration; + +const ETAG: &'static [u8] = b"ETag"; +const LAST_MODIFIED: &'static [u8] = b"Last-Modified"; #[derive(Debug, Copy, Clone, PartialEq, Eq)] /// The last known state of the changelog. @@ -116,6 +122,9 @@ pub struct HttpRegistry<'cfg> { at: Cell<(ChangelogState, InternedString)>, checked_for_at: Cell, http: RefCell>, + prefetch: Multi, + multiplexing: bool, + downloads: Downloads, } impl<'cfg> HttpRegistry<'cfg> { @@ -128,6 +137,9 @@ impl<'cfg> HttpRegistry<'cfg> { at: Cell::new(ChangelogState::Unsupported.into()), checked_for_at: Cell::new(false), http: RefCell::new(None), + prefetch: Multi::new(), + multiplexing: false, + downloads: Downloads::default(), } } @@ -151,6 +163,25 @@ impl<'cfg> HttpRegistry<'cfg> { })) } } + + fn handle_http_header(buf: &[u8]) -> Option<(&[u8], &str)> { + if buf.is_empty() { + return None; + } + + let mut parts = buf.splitn(2, |&c| c == b':'); + let tag = parts.next().expect("first item of split is always Some"); + let rest = parts.next()?; + let rest = std::str::from_utf8(rest).ok()?; + let rest = rest.trim(); + + // Don't let server sneak extra lines anywhere. + if rest.contains('\n') { + return None; + } + + Some((tag, rest)) + } } const LAST_UPDATED_FILE: &str = ".last-updated"; @@ -209,9 +240,384 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { *http = Some(handle); } } + Ok(()) } + fn start_prefetch(&mut self) -> CargoResult { + // NOTE: lifted from src/cargo/core/package.rs + // + // We've enabled the `http2` feature of `curl` in Cargo, so treat + // failures here as fatal as it would indicate a build-time problem. + // + // Note that the multiplexing support is pretty new so we're having it + // off-by-default temporarily. + // + // Also note that pipelining is disabled as curl authors have indicated + // that it's buggy, and we've empirically seen that it's buggy with HTTP + // proxies. + // + // TODO: Is that still the case? We probably want pipelining here if possible. + self.multiplexing = self.config.http_config()?.multiplexing.unwrap_or(true); + + self.prefetch + .pipelining(false, self.multiplexing) + .chain_err(|| "failed to enable multiplexing/pipelining in curl")?; + + // let's not flood crates.io with connections + self.prefetch.set_max_host_connections(2)?; + + Ok(true) + } + + fn prefetch(&mut self, root: &Path, path: &Path, req: &semver::VersionReq) -> CargoResult<()> { + // A quick overview of what goes on below: + // + // We first check if we have a local copy of the given index file. + // + // If we do, and the server has a changelog, then we know that the index file is up to + // date (as of when we last checked the changelog), so there's no need to double-check with + // the server that the file isn't stale. We can just tell the next call to + // `next_prefetched` to go ahead with this path immediately. If we _need_ a newer version + // of it, `update_index` will be called and then `prefetch` will be called again. + // + // If we do, but the server does not have a changelog, we need to check with the server if + // the index file has changed upstream. We do this using a conditional HTTP request using + // the `Last-Modified` and `ETag` headers we got when we fetched the currently cached index + // file (those headers are stored in the first two lines of each index file). That way, if + // nothing has changed (likely the common case), the server doesn't have to send us + // any data, just a 304 Not Modified. + // + // If we don't have a local copy of the index file, we need to fetch it from the server. + let pkg = root.join(path); + let bytes; + let was = if pkg.exists() { + if self.at.get().0.is_synchronized() { + // We already have this file locally, and we don't need to double-check it with + // upstream because we have a changelog, so there's really nothing to prefetch. + // We do keep track of the request though so that we will eventually yield this + // back to the caller who may then want to prefetch other transitive dependencies. + if let Some(f) = self + .downloads + .eager + .iter_mut() + .find(|f| f.primary.path == path) + { + if &f.primary.req != req { + f.others.insert(req.clone()); + } + } else { + self.downloads.eager.push(MultiVersionFetched { + primary: Fetched { + path: path.to_path_buf(), + req: req.clone(), + }, + others: HashSet::new(), + }); + } + return Ok(()); + } + + // We have a local copy that we need to double-check the contents of. + // First, extract the `Last-Modified` and `Etag` headers. + trace!("prefetch load {} from disk", path.display()); + bytes = paths::read_bytes(&pkg)?; + let mut lines = bytes.splitn(3, |&c| c == b'\n'); + let etag = lines.next().expect("splitn always returns >=1 item"); + let last_modified = if let Some(lm) = lines.next() { + lm + } else { + anyhow::bail!("index file is missing HTTP header header"); + }; + let rest = if let Some(rest) = lines.next() { + rest + } else { + anyhow::bail!("index file is missing HTTP header header"); + }; + + assert!(!self.config.offline()); + debug!("double-checking freshness of {}", path.display()); + + let etag = std::str::from_utf8(etag)?; + let last_modified = std::str::from_utf8(last_modified)?; + Some((etag, last_modified, rest)) + } else { + None + }; + + // If the path is already being fetched, don't fetch it again. + // Just note down the version requirement and move on. + if let Some(token) = self.downloads.pending_ids.get(path) { + let (dl, _) = &mut self.downloads.pending[token]; + if &dl.req != req { + dl.additional_reqs.insert(req.clone()); + } + return Ok(()); + } else if let Some(f) = self + .downloads + .eager + .iter_mut() + .find(|f| f.primary.path == path) + { + if &f.primary.req != req { + f.others.insert(req.clone()); + } + return Ok(()); + } + + // Looks like we're going to have to bite the bullet and do a network request. + let url = self.source_id.url(); + self.prepare()?; + + let mut handle = ops::http_handle(self.config)?; + debug!("fetch {}{}", url, path.display()); + handle.get(true)?; + handle.url(&format!("{}{}", url, path.display()))?; + handle.follow_location(true)?; + + // Enable HTTP/2 if possible. + if self.multiplexing { + try_old_curl!(handle.http_version(HttpVersion::V2), "HTTP2"); + } else { + handle.http_version(HttpVersion::V11)?; + } + + // This is an option to `libcurl` which indicates that if there's a + // bunch of parallel requests to the same host they all wait until the + // pipelining status of the host is known. This means that we won't + // initiate dozens of connections to crates.io, but rather only one. + // Once the main one is opened we realized that pipelining is possible + // and multiplexing is possible with static.crates.io. All in all this + // reduces the number of connections done to a more manageable state. + try_old_curl!(handle.pipewait(true), "pipewait"); + + // Make sure we don't send data back if it's the same as we have in the index. + if let Some((ref etag, ref last_modified, _)) = was { + let mut list = List::new(); + list.append(&format!("If-None-Match: {}", etag))?; + list.append(&format!("If-Modified-Since: {}", last_modified))?; + handle.http_headers(list)?; + } + + // We're going to have a bunch of downloads all happening "at the same time". + // So, we need some way to track what headers/data/responses are for which request. + // We do that through this token. Each request (and associated response) gets one. + let token = self.downloads.next; + self.downloads.next += 1; + debug!("downloading {} as {}", path.display(), token); + assert_eq!( + self.downloads.pending_ids.insert(path.to_path_buf(), token), + None, + "path queued for download more than once" + ); + let dl = Download { + token, + data: RefCell::new(Vec::new()), + path: path.to_path_buf(), + req: req.clone(), + additional_reqs: HashSet::new(), + etag: RefCell::new(None), + last_modified: RefCell::new(None), + }; + + // Each write should go to self.downloads.pending[&token].data. + // Since the write function must be 'static, we access downloads through a thread-local. + // That thread-local is set up in `next_prefetched` when it calls self.prefetch.perform, + // which is what ultimately calls this method. + handle.write_function(move |buf| { + debug!("{} - {} bytes of data", token, buf.len()); + tls::with(|downloads| { + if let Some(downloads) = downloads { + downloads.pending[&token] + .0 + .data + .borrow_mut() + .extend_from_slice(buf); + } + }); + Ok(buf.len()) + })?; + + // Same goes for the header function -- it goes through thread-local storage. + handle.header_function(move |buf| { + if let Some((tag, value)) = Self::handle_http_header(buf) { + let is_etag = buf.eq_ignore_ascii_case(ETAG); + let is_lm = buf.eq_ignore_ascii_case(LAST_MODIFIED); + if is_etag || is_lm { + debug!( + "{} - got header {}: {}", + token, + std::str::from_utf8(tag) + .expect("both ETAG and LAST_MODIFIED are valid strs"), + value + ); + + // Append a new line to each so we can easily prepend to the index file. + let mut s = String::with_capacity(value.len() + 1); + s.push_str(value); + s.push('\n'); + tls::with(|downloads| { + if let Some(downloads) = downloads { + let into = if is_etag { + &downloads.pending[&token].0.etag + } else { + &downloads.pending[&token].0.last_modified + }; + *into.borrow_mut() = Some(s); + } + }) + } + } + + true + })?; + + // TODO: Track and display download progress (see `Downloads` in `core/pacakge.rs`). + + // Finally add the request we've lined up to the pool of requests that cURL manages. + let mut handle = self.prefetch.add(handle)?; + handle.set_token(token); + self.downloads.pending.insert(dl.token, (dl, handle)); + + Ok(()) + } + + fn next_prefetched(&mut self) -> CargoResult> { + while !self.downloads.pending.is_empty() && !self.downloads.eager.is_empty() { + // We may already have packages that are ready to go. This takes care of grabbing the + // next of those, while ensuring that we yield every distinct version requirement for + // each package. + if let Some(fetched) = self.downloads.eager.pop() { + return if let Some(req) = fetched.others.iter().next().cloned() { + fetched.others.remove(&req); + Ok(Some(Fetched { + path: fetched.primary.path.clone(), + req, + })) + } else { + Ok(Some(fetched.primary)) + }; + } + + // We don't have any fetched results immediately ready to be yielded, + // so we need to check if curl has made any progress. + assert_eq!( + self.downloads.pending.len(), + self.downloads.pending_ids.len() + ); + // Note the `tls::set` here which sets up the thread-local storage needed to access + // self.downloads from `write_function` and `header_function` above. + let remaining_in_multi = tls::set(&self.downloads, || { + self.prefetch + .perform() + .chain_err(|| "failed to perform http requests") + })?; + debug!("handles remaining: {}", remaining_in_multi); + + // Walk all the messages cURL came across in case anything completed. + let results = &mut self.downloads.results; + let pending = &self.downloads.pending; + self.prefetch.messages(|msg| { + let token = msg.token().expect("failed to read token"); + let handle = &pending[&token].1; + if let Some(result) = msg.result_for(handle) { + results.push((token, result)); + } else { + debug!("message without a result (?)"); + } + }); + + // Walk all the requests that completed and handle their responses. + // + // This will ultimately add more replies to self.downloads.eager, which we'll + while let Some((token, result)) = results.pop() { + debug!("{} finished with {:?}", token, result); + + let (mut dl, handle) = self + .downloads + .pending + .remove(&token) + .expect("got a token for a non-in-progress transfer"); + + // TODO: Re-use this memory for another download? + let data = dl.data.into_inner(); + let mut handle = self.prefetch.remove(handle)?; + self.downloads.pending_ids.remove(&dl.path); + + let fetched = MultiVersionFetched { + primary: Fetched { + path: dl.path, + req: dl.req, + }, + others: dl.additional_reqs, + }; + + let code = handle.response_code()?; + debug!( + "index file downloaded with status code {}", + handle.response_code()? + ); + // TODO: How do we ensure that the next call to load doesn't _also_ send an HTTP + // request? Do we need to keep track of each fetched prefetched path or something? + match code { + 200 => { + // We got data back, hooray! + // Let's update the index file. + let path = self.config.assert_package_cache_locked(&self.index_path); + let pkg = path.join(&fetched.primary.path); + paths::create_dir_all(pkg.parent().expect("pkg is a file"))?; + let mut file = paths::create(pkg)?; + file.write_all(dl.etag.into_inner().as_deref().unwrap_or("\n").as_bytes())?; + file.write_all( + dl.last_modified + .into_inner() + .as_deref() + .unwrap_or("\n") + .as_bytes(), + )?; + file.write_all(&data)?; + file.flush()?; + + self.downloads.eager.push(fetched); + } + 304 => { + // Not Modified response. + // There's nothing for us to do -- the index file is up to date. + // The only thing that matters is telling the caller about this package. + self.downloads.eager.push(fetched); + } + 404 | 410 | 451 => { + // The crate was deleted from the registry. + todo!(); + } + code => { + anyhow::bail!("server returned unexpected HTTP status code {}", code); + } + } + } + + if !self.downloads.eager.is_empty() { + continue; + } + + if self.downloads.pending.is_empty() { + // We're all done! + return Ok(None); + } + + // We have no more replies to provide the caller with, + // so we need to wait until cURL has something new for us. + let timeout = self + .prefetch + .get_timeout()? + .unwrap_or_else(|| Duration::new(5, 0)); + self.prefetch + .wait(&mut [], timeout) + .chain_err(|| "failed to wait on curl `Multi`")?; + } + Ok(None) + } + fn index_path(&self) -> &Filesystem { // NOTE: I'm pretty sure this method is unnecessary. // The only place it is used is to set `.path` in `RegistryIndex`, @@ -238,24 +644,9 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { path: &Path, data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, ) -> CargoResult<()> { - // A quick overview of what goes on below: - // - // We first check if we have a local copy of the given index file. - // - // If we do, and the server has a changelog, then we know that the index file is up to - // date (as of when we last checked the changelog), so there's no need to double-check with - // the server that the file isn't stale. We can just return its contents directly. If we - // _need_ a newer version of it, `update_index` will be called and then `load` will be - // called again. - // - // If we do, but the server does not have a changelog, we need to check with the server if - // the index file has changed upstream. We do this using a conditional HTTP request using - // the `Last-Modified` and `ETag` headers we got when we fetched the currently cached index - // file (those headers are stored in the first two lines of each index file). That way, if - // nothing has changed (likely the common case), the server doesn't have to send us - // any data, just a 304 Not Modified. - // - // If we don't have a local copy of the index file, we need to fetch it from the server. + // NOTE: This is pretty much a synchronous version of the prefetch() + next_prefetched() + // dance. Much of the code is sort-of duplicated, which isn't great, but it works. + let pkg = root.join(path); let bytes; let was = if pkg.exists() { @@ -339,36 +730,19 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // Capture ETag and Last-Modified. transfer.header_function(|buf| { - const ETAG: &'static [u8] = b"ETag:"; - const LAST_MODIFIED: &'static [u8] = b"Last-Modified:"; - - let (tag, buf) = - if buf.len() >= ETAG.len() && buf[..ETAG.len()].eq_ignore_ascii_case(ETAG) { - (ETAG, &buf[ETAG.len()..]) - } else if buf.len() >= LAST_MODIFIED.len() - && buf[..LAST_MODIFIED.len()].eq_ignore_ascii_case(LAST_MODIFIED) - { - (LAST_MODIFIED, &buf[LAST_MODIFIED.len()..]) - } else { - return true; - }; - - if let Ok(buf) = std::str::from_utf8(buf) { - let buf = buf.trim(); - - // Don't let server sneak more lines into index file. - if buf.contains('\n') { - return true; - } - - // Append a new line to each so we can easily prepend to the index file. - let mut s = String::with_capacity(buf.len() + 1); - s.push_str(buf); - s.push('\n'); - if tag == ETAG { - etag = Some(s); - } else if tag == LAST_MODIFIED { - last_modified = Some(s); + if let Some((tag, value)) = Self::handle_http_header(buf) { + let is_etag = buf.eq_ignore_ascii_case(ETAG); + let is_lm = buf.eq_ignore_ascii_case(LAST_MODIFIED); + if is_etag || is_lm { + // Append a new line to each so we can easily prepend to the index file. + let mut s = String::with_capacity(value.len() + 1); + s.push_str(value); + s.push('\n'); + if is_etag { + etag = Some(s); + } else if is_lm { + last_modified = Some(s); + } } } @@ -1021,3 +1395,88 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { false } } + +struct MultiVersionFetched { + primary: Fetched, + others: HashSet, +} + +// NOTE: what follows is lifted from src/cargo/core/package.rs and tweaked + +/// Helper for downloading crates. +#[derive(Default)] +pub struct Downloads { + /// When a download is started, it is added to this map. The key is a + /// "token" (see `Download::token`). It is removed once the download is + /// finished. + pending: HashMap, + /// Set of paths currently being downloaded, mapped to their tokens. + /// This should stay in sync with `pending`. + pending_ids: HashMap, + /// The final result of each download. A pair `(token, result)`. This is a + /// temporary holding area, needed because curl can report multiple + /// downloads at once, but the main loop (`wait`) is written to only + /// handle one at a time. + results: Vec<(usize, Result<(), curl::Error>)>, + /// Prefetch requests that we already have a response to. + eager: Vec, + /// The next ID to use for creating a token (see `Download::token`). + next: usize, + /// Indicates *all* downloads were successful. + success: bool, +} + +struct Download { + /// The token for this download, used as the key of the `Downloads::pending` map + /// and stored in `EasyHandle` as well. + token: usize, + + /// The package that we're downloading. + path: PathBuf, + + /// The version requirements for the dependency line that triggered this fetch. + req: semver::VersionReq, + + /// Additional version requirements for same package. + additional_reqs: HashSet, + + /// Actual downloaded data, updated throughout the lifetime of this download. + data: RefCell>, + + /// ETag and Last-Modified headers received from the server (if any). + etag: RefCell>, + last_modified: RefCell>, +} + +mod tls { + use std::cell::Cell; + + use super::Downloads; + + thread_local!(static PTR: Cell = Cell::new(0)); + + pub(crate) fn with(f: impl FnOnce(Option<&Downloads>) -> R) -> R { + let ptr = PTR.with(|p| p.get()); + if ptr == 0 { + f(None) + } else { + unsafe { f(Some(&*(ptr as *const Downloads))) } + } + } + + pub(crate) fn set(dl: &Downloads, f: impl FnOnce() -> R) -> R { + struct Reset<'a, T: Copy>(&'a Cell, T); + + impl<'a, T: Copy> Drop for Reset<'a, T> { + fn drop(&mut self) { + self.0.set(self.1); + } + } + + PTR.with(|p| { + let _reset = Reset(p, p.get()); + p.set(dl as *const Downloads as usize); + f() + }) + } +} diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index f7690f3d652..fd3e71b6676 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -68,7 +68,7 @@ use crate::core::dependency::Dependency; use crate::core::{PackageId, SourceId, Summary}; -use crate::sources::registry::{RegistryData, RegistryPackage}; +use crate::sources::registry::{make_dep_prefix, RegistryData, RegistryPackage}; use crate::util::interning::InternedString; use crate::util::paths; use crate::util::{internal, CargoResult, Config, Filesystem, ToSemver}; @@ -351,6 +351,7 @@ impl<'cfg> RegistryIndex<'cfg> { root, &cache_root, path.as_ref(), + req, self.source_id, load, self.config, @@ -458,6 +459,115 @@ impl<'cfg> RegistryIndex<'cfg> { } impl Summaries { + pub fn prefetch<'a>( + index_version: Option<&str>, + root: &Path, + cache_root: &Path, + deps: &[Dependency], + source_id: SourceId, + load: &mut dyn RegistryData, + config: &Config, + ) -> CargoResult<()> { + // For some registry backends, it's expensive to fetch each individual index file, and the + // process can be sped up significantly by fetching many index files in advance. For + // backends where that is the case, we do an approximate walk of all transitive + // dependencies and fetch their index file in a pipelined fashion. This means that by the + // time the individual loads (see load.load in Summary::parse), those should all be quite + // fast. + // + // We have the advantage here of being able to play fast and loose with the exact + // dependency requirements. It's fine if we fetch a bit too much, since the incremental + // cost of each index file is small. It's even fine if we fetch too few index files -- + // they'll just have to be fetched on the slow path later. + if config.offline() || !load.start_prefetch()? { + // Backend does not support prefetching. + } + + log::debug!("prefetching transitive dependencies"); + + let relative = |name: &str| { + let mut prefix = make_dep_prefix(name); + prefix.push('/'); + prefix.push_str(name); + prefix + }; + + // Seed the prefetching with the root dependencies. + for dep in deps { + // TODO: Skip if in cache? + load.prefetch( + root, + Path::new(&relative(&dep.package_name())), + dep.version_req(), + )?; + } + + // Now, continuously iterate by walking dependencies we've loaded and fetching the index + // entry for _their_ dependencies. + while let Some(fetched) = load.next_prefetched()? { + // TODO: make use of RegistryIndex::summaries_cache + let summaries = Self::parse( + index_version, + root, + cache_root, + &fetched.path, + source_id, + load, + config, + )?; + + let summaries = if let Some(s) = summaries { s } else { continue }; + + for (version, maybe_summary) in summaries.versions { + if !fetched.req.matches(&version) { + // The crate that pulled in this crate as a dependency did not care about this + // particular version, so we don't need to walk its dependencies. + // + // We _could_ simply walk every transitive dependency, and it probably wouldn't + // be _that_ bad. But over time it'd mean that a bunch of index files are + // pulled down even though they're no longer used anywhere in the dependency + // closure. This, again, probably doesn't matter, and it would make the logic + // here _much_ simpler, but for now we try to do better. + // + // Note that another crate in the dependency closure might still pull in this + // version because that crate has a different set of requirements. + continue; + } + + let summary = maybe_summary.parse(config, &summaries.raw_data, source_id)?; + + if summary.yanked { + // This version has been yanked, so let's not even go there. + continue; + } + + for dep in summary.summary.dependencies() { + if dep.source_id() != source_id { + // This dependency lives in a different source, so we won't be prefetching + // anything from there anyway. + // + // It is _technically_ possible that a dependency in a different source + // then pulls in a dependency from _this_ source again, but we'll let that + // go to the slow path. + continue; + } + + let raw_path = relative(&*dep.package_name()); + for relative in UncanonicalizedIter::new(&raw_path).take(1024) { + // NOTE: Many of these prefetches will "miss", but that's okay. + // They're going to be pipelined anyway. + load.prefetch(root, Path::new(&relative), dep.version_req()); + } + + // TODO: make sure that the things we prefetch do not get + // double-checked later on _unless_ there has been an update_index. + } + } + } + + Ok(()) + } + /// Parse out a `Summaries` instances from on-disk state. /// /// This will attempt to prefer parsing a previous cache file that already @@ -485,7 +595,7 @@ impl Summaries { cache_root: &Path, relative: &Path, source_id: SourceId, - load: &mut dyn RegistryData, + load: &dyn RegistryData, config: &Config, ) -> CargoResult> { // First up, attempt to load the cache. This could fail for all manner diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index caaca8c391e..308034471d2 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -369,9 +369,25 @@ impl<'a> RegistryDependency<'a> { } } +pub struct Fetched { + path: PathBuf, + req: semver::VersionReq, +} + pub trait RegistryData { fn prepare(&self) -> CargoResult<()>; fn index_path(&self) -> &Filesystem; + + fn start_prefetch(&mut self) -> CargoResult { + Ok(false) + } + fn prefetch(&mut self, root: &Path, path: &Path, req: &semver::VersionReq) -> CargoResult<()> { + Ok(()) + } + fn next_prefetched(&mut self) -> CargoResult> { + Ok(None) + } + fn load( &self, root: &Path, From 42453f50e990090665687c1acaba4177774f5c5c Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 10:17:12 -0800 Subject: [PATCH 21/83] Progress on greedy prefetching --- src/cargo/core/registry.rs | 42 ++++++++++ src/cargo/core/resolver/dep_cache.rs | 11 +++ src/cargo/core/source/mod.rs | 13 ++++ src/cargo/sources/directory.rs | 5 ++ src/cargo/sources/git/source.rs | 5 ++ src/cargo/sources/path.rs | 5 ++ src/cargo/sources/registry/http_remote.rs | 70 +++++++++++++---- src/cargo/sources/registry/index.rs | 94 ++++++++++++++--------- src/cargo/sources/registry/mod.rs | 16 +++- src/cargo/sources/replaced.rs | 17 ++++ 10 files changed, 225 insertions(+), 53 deletions(-) diff --git a/src/cargo/core/registry.rs b/src/cargo/core/registry.rs index 0380c447d39..53775e25e4e 100644 --- a/src/cargo/core/registry.rs +++ b/src/cargo/core/registry.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::collections::{HashMap, HashSet}; use crate::core::PackageSet; @@ -15,6 +16,9 @@ use url::Url; /// /// See also `core::Source`. pub trait Registry { + /// Give source the opportunity to batch pre-fetch dependency information. + fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()>; + /// Attempt to find the packages that match a dependency request. fn query( &mut self, @@ -482,6 +486,44 @@ https://doc.rust-lang.org/cargo/reference/overriding-dependencies.html } impl<'cfg> Registry for PackageRegistry<'cfg> { + fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { + assert!(self.patches_locked); + + if self.sources.len() == 1 { + // Fast path -- there is only one source, so no need to partition by SourceId. + self.sources + .sources_mut() + .next() + .unwrap() + .1 + .prefetch(deps)?; + } else { + // We need to partition deps so that we can prefetch dependencies from different + // sources. Note that we do not prefetch from overrides. + let mut deps_per_source = HashMap::new(); + for dep in deps { + deps_per_source + .entry(dep.source_id()) + .or_insert_with(Vec::new) + .push(dep); + } + + for (s, deps) in deps_per_source { + // Ensure the requested source_id is loaded + self.ensure_loaded(s, Kind::Normal).chain_err(|| { + anyhow::format_err!("failed to load source for dependency prefetching",) + })?; + + self.sources + .get_mut(s) + .unwrap() + .prefetch(&mut deps.into_iter())?; + } + } + + Ok(()) + } + fn query( &mut self, dep: &Dependency, diff --git a/src/cargo/core/resolver/dep_cache.rs b/src/cargo/core/resolver/dep_cache.rs index 1f6c49ca0fb..1f292289be3 100644 --- a/src/cargo/core/resolver/dep_cache.rs +++ b/src/cargo/core/resolver/dep_cache.rs @@ -19,6 +19,7 @@ use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; use crate::util::Config; use log::debug; +use std::borrow::Cow; use std::cmp::Ordering; use std::collections::{BTreeSet, HashMap, HashSet}; use std::rc::Rc; @@ -263,6 +264,16 @@ impl<'a> RegistryQueryer<'a> { // for our own dependencies. let (used_features, deps) = resolve_features(parent, candidate, opts)?; + // Then, allow the source to batch pre-fetch dependencies we may need. + self.registry + .prefetch(&mut deps.iter().map(|(d, _)| Cow::Borrowed(d))) + .chain_err(|| { + anyhow::format_err!( + "failed to prefetch dependencies of {}", + describe_path(&cx.parents.path_to_bottom(&candidate.package_id())), + ) + })?; + // Next, transform all dependencies into a list of possible candidates // which can satisfy that dependency. let mut deps = deps diff --git a/src/cargo/core/source/mod.rs b/src/cargo/core/source/mod.rs index f61e9636374..00a3980bedc 100644 --- a/src/cargo/core/source/mod.rs +++ b/src/cargo/core/source/mod.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::collections::hash_map::HashMap; use std::fmt; @@ -27,6 +28,9 @@ pub trait Source { /// the `precise` field in the source id listed. fn requires_precise(&self) -> bool; + /// Give source the opportunity to batch pre-fetch dependency information. + fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()>; + /// Attempts to find the packages that match a dependency request. fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()>; @@ -129,6 +133,11 @@ impl<'a, T: Source + ?Sized + 'a> Source for Box { (**self).requires_precise() } + /// Forwards to `Source::prefetch`. + fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { + (**self).prefetch(deps) + } + /// Forwards to `Source::query`. fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { (**self).query(dep, f) @@ -197,6 +206,10 @@ impl<'a, T: Source + ?Sized + 'a> Source for &'a mut T { (**self).requires_precise() } + fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { + (**self).prefetch(deps) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { (**self).query(dep, f) } diff --git a/src/cargo/sources/directory.rs b/src/cargo/sources/directory.rs index 3e6daf034b8..45939ca339e 100644 --- a/src/cargo/sources/directory.rs +++ b/src/cargo/sources/directory.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::collections::HashMap; use std::fmt::{self, Debug, Formatter}; use std::path::{Path, PathBuf}; @@ -42,6 +43,10 @@ impl<'cfg> Debug for DirectorySource<'cfg> { } impl<'cfg> Source for DirectorySource<'cfg> { + fn prefetch(&mut self, _: &mut dyn Iterator>) -> CargoResult<()> { + Ok(()) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { let packages = self.packages.values().map(|p| &p.0); let matches = packages.filter(|pkg| dep.matches(pkg.summary())); diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 3e66dd3cda8..417611c782e 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -8,6 +8,7 @@ use crate::util::hex::short_hash; use crate::util::Config; use anyhow::Context; use log::trace; +use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; use url::Url; @@ -83,6 +84,10 @@ impl<'cfg> Debug for GitSource<'cfg> { } impl<'cfg> Source for GitSource<'cfg> { + fn prefetch(&mut self, _: &mut dyn Iterator>) -> CargoResult<()> { + Ok(()) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { let src = self .path_source diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index 64b0f77ed5a..f898e04bcb6 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; use std::fs; use std::path::{Path, PathBuf}; @@ -469,6 +470,10 @@ impl<'cfg> Debug for PathSource<'cfg> { } impl<'cfg> Source for PathSource<'cfg> { + fn prefetch(&mut self, _: &mut dyn Iterator>) -> CargoResult<()> { + Ok(()) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { for s in self.packages.iter().map(|p| p.summary()) { if dep.matches(s) { diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index d8e232aec43..778e955fa69 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -113,6 +113,14 @@ impl ToString for ChangelogState { /// the client to fetch the changelog, invalidate its locally cached index files for only the /// changed crates, and then not worry about double-checking with the server for each index file. /// +/// In order to take advantage of HTTP/2's ability to efficiently send multiple concurrent HTTP +/// requests over a single connection, `HttpRegistry` also supports asynchronous prefetching. The +/// caller queues up a number of index files they think it is likely they will want to access, and +/// `HttpRegistry` fires off requests for each one without synchronously waiting for the response. +/// The caller then drives the processing of the responses, which update the index files that are +/// stored on disk, before moving on to the _actual_ dependency resolution. See +/// [`RegistryIndex::prefetch`] for more details. +/// /// [RFC XXX]: https://github.com/rust-lang/rfcs/pull/2789 pub struct HttpRegistry<'cfg> { index_path: Filesystem, @@ -124,6 +132,7 @@ pub struct HttpRegistry<'cfg> { http: RefCell>, prefetch: Multi, multiplexing: bool, + prefetched: bool, downloads: Downloads, } @@ -139,6 +148,7 @@ impl<'cfg> HttpRegistry<'cfg> { http: RefCell::new(None), prefetch: Multi::new(), multiplexing: false, + prefetched: false, downloads: Downloads::default(), } } @@ -267,10 +277,18 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // let's not flood crates.io with connections self.prefetch.set_max_host_connections(2)?; + self.prefetched = true; + Ok(true) } - fn prefetch(&mut self, root: &Path, path: &Path, req: &semver::VersionReq) -> CargoResult<()> { + fn prefetch( + &mut self, + root: &Path, + path: &Path, + name: InternedString, + req: &semver::VersionReq, + ) -> CargoResult<()> { // A quick overview of what goes on below: // // We first check if we have a local copy of the given index file. @@ -310,6 +328,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { self.downloads.eager.push(MultiVersionFetched { primary: Fetched { path: path.to_path_buf(), + name, req: req.clone(), }, others: HashSet::new(), @@ -348,7 +367,11 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // If the path is already being fetched, don't fetch it again. // Just note down the version requirement and move on. if let Some(token) = self.downloads.pending_ids.get(path) { - let (dl, _) = &mut self.downloads.pending[token]; + let (dl, _) = self + .downloads + .pending + .get_mut(token) + .expect("invalid token"); if &dl.req != req { dl.additional_reqs.insert(req.clone()); } @@ -414,6 +437,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { token, data: RefCell::new(Vec::new()), path: path.to_path_buf(), + name, req: req.clone(), additional_reqs: HashSet::new(), etag: RefCell::new(None), @@ -476,7 +500,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // Finally add the request we've lined up to the pool of requests that cURL manages. let mut handle = self.prefetch.add(handle)?; - handle.set_token(token); + handle.set_token(token)?; self.downloads.pending.insert(dl.token, (dl, handle)); Ok(()) @@ -487,13 +511,16 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // We may already have packages that are ready to go. This takes care of grabbing the // next of those, while ensuring that we yield every distinct version requirement for // each package. - if let Some(fetched) = self.downloads.eager.pop() { + if let Some(mut fetched) = self.downloads.eager.pop() { return if let Some(req) = fetched.others.iter().next().cloned() { fetched.others.remove(&req); - Ok(Some(Fetched { + let ret = Ok(Some(Fetched { path: fetched.primary.path.clone(), + name: fetched.primary.name, req, - })) + })); + self.downloads.eager.push(fetched); + ret } else { Ok(Some(fetched.primary)) }; @@ -533,13 +560,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { while let Some((token, result)) = results.pop() { debug!("{} finished with {:?}", token, result); - let (mut dl, handle) = self + let (dl, handle) = self .downloads .pending .remove(&token) .expect("got a token for a non-in-progress transfer"); - // TODO: Re-use this memory for another download? let data = dl.data.into_inner(); let mut handle = self.prefetch.remove(handle)?; self.downloads.pending_ids.remove(&dl.path); @@ -547,6 +573,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let fetched = MultiVersionFetched { primary: Fetched { path: dl.path, + name: dl.name, req: dl.req, }, others: dl.additional_reqs, @@ -586,7 +613,11 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // The only thing that matters is telling the caller about this package. self.downloads.eager.push(fetched); } - 404 | 410 | 451 => { + 404 => { + // Not Found response. + // The crate doesn't exist, so we simply do not yield it. + } + 410 | 451 => { // The crate was deleted from the registry. todo!(); } @@ -671,7 +702,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let double_check = !self.at.get().0.is_synchronized() || path.ends_with("config.json"); if double_check { - if self.config.offline() { + if self.prefetched { + trace!( + "not double-checking freshness of {} after prefetch", + path.display() + ); + } else if self.config.offline() { debug!( "not double-checking freshness of {} due to offline", path.display() @@ -731,8 +767,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // Capture ETag and Last-Modified. transfer.header_function(|buf| { if let Some((tag, value)) = Self::handle_http_header(buf) { - let is_etag = buf.eq_ignore_ascii_case(ETAG); - let is_lm = buf.eq_ignore_ascii_case(LAST_MODIFIED); + let is_etag = tag.eq_ignore_ascii_case(ETAG); + let is_lm = tag.eq_ignore_ascii_case(LAST_MODIFIED); if is_etag || is_lm { // Append a new line to each so we can easily prepend to the index file. let mut s = String::with_capacity(value.len() + 1); @@ -829,6 +865,9 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { debug!("updating the index"); + // Make sure that subsequent loads double-check with the server again. + self.prefetched = false; + self.prepare()?; let path = self.config.assert_package_cache_locked(&self.index_path); self.config @@ -1422,8 +1461,6 @@ pub struct Downloads { eager: Vec, /// The next ID to use for creating a token (see `Download::token`). next: usize, - /// Indicates *all* downloads were successful. - success: bool, } struct Download { @@ -1431,9 +1468,12 @@ struct Download { /// and stored in `EasyHandle` as well. token: usize, - /// The package that we're downloading. + /// The path of the package that we're downloading. path: PathBuf, + /// The name of the package that we're downloading. + name: InternedString, + /// The version requirements for the dependency line that triggered this fetch. req: semver::VersionReq, diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index fd3e71b6676..4201c1f1fc2 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -74,7 +74,8 @@ use crate::util::paths; use crate::util::{internal, CargoResult, Config, Filesystem, ToSemver}; use log::info; use semver::{Version, VersionReq}; -use std::collections::{HashMap, HashSet}; +use std::borrow::Cow; +use std::collections::{hash_map::Entry, HashMap, HashSet}; use std::fs; use std::path::Path; use std::str; @@ -351,7 +352,6 @@ impl<'cfg> RegistryIndex<'cfg> { root, &cache_root, path.as_ref(), - req, self.source_id, load, self.config, @@ -456,17 +456,11 @@ impl<'cfg> RegistryIndex<'cfg> { .any(|summary| summary.yanked); Ok(found) } -} -impl Summaries { - pub fn prefetch<'a>( - index_version: Option<&str>, - root: &Path, - cache_root: &Path, - deps: &[Dependency], - source_id: SourceId, + pub fn prefetch( + &mut self, + deps: &mut dyn Iterator>, load: &mut dyn RegistryData, - config: &Config, ) -> CargoResult<()> { // For some registry backends, it's expensive to fetch each individual index file, and the // process can be sped up significantly by fetching many index files in advance. For @@ -479,10 +473,17 @@ impl Summaries { // dependency requirements. It's fine if we fetch a bit too much, since the incremental // cost of each index file is small. It's even fine if we fetch too few index files -- // they'll just have to be fetched on the slow path later. - if config.offline() || !load.start_prefetch()? { + if self.config.offline() || !load.start_prefetch()? { // Backend does not support prefetching. + return Ok(()); } + load.prepare()?; + + let root = load.assert_index_locked(&self.path); + let cache_root = root.join(".cache"); + let index_version = load.current_version(); + log::debug!("prefetching transitive dependencies"); let relative = |name: &str| { @@ -494,31 +495,45 @@ impl Summaries { // Seed the prefetching with the root dependencies. for dep in deps { - // TODO: Skip if in cache? - load.prefetch( - root, - Path::new(&relative(&dep.package_name())), - dep.version_req(), - )?; + let raw_path = relative(&*dep.package_name()); + for relative in UncanonicalizedIter::new(&raw_path).take(1024) { + load.prefetch( + root, + &Path::new(&relative), + dep.package_name(), + dep.version_req(), + )?; + } } // Now, continuously iterate by walking dependencies we've loaded and fetching the index // entry for _their_ dependencies. while let Some(fetched) = load.next_prefetched()? { - // TODO: make use of RegistryIndex::summaries_cache - let summaries = Self::parse( - index_version, - root, - cache_root, - &fetched.path, - source_id, - load, - config, - )?; - - let summaries = if let Some(s) = summaries { s } else { continue }; + let summaries = if let Some(s) = self.summaries_cache.get_mut(&fetched.name) { + s + } else { + let summaries = Summaries::parse( + index_version.as_deref(), + root, + &cache_root, + &fetched.path, + self.source_id, + load, + self.config, + )?; + + let summaries = if let Some(s) = summaries { s } else { continue }; + + match self.summaries_cache.entry(fetched.name) { + Entry::Vacant(v) => v.insert(summaries), + Entry::Occupied(mut o) => { + let _ = o.insert(summaries); + o.into_mut() + } + } + }; - for (version, maybe_summary) in summaries.versions { + for (version, maybe_summary) in &mut summaries.versions { if !fetched.req.matches(&version) { // The crate that pulled in this crate as a dependency did not care about this // particular version, so we don't need to walk its dependencies. @@ -534,7 +549,8 @@ impl Summaries { continue; } - let summary = maybe_summary.parse(config, &summaries.raw_data, source_id)?; + let summary = + maybe_summary.parse(self.config, &summaries.raw_data, self.source_id)?; if summary.yanked { // This version has been yanked, so let's not even go there. @@ -542,7 +558,7 @@ impl Summaries { } for dep in summary.summary.dependencies() { - if dep.source_id() != source_id { + if dep.source_id() != self.source_id { // This dependency lives in a different source, so we won't be prefetching // anything from there anyway. // @@ -556,18 +572,22 @@ impl Summaries { for relative in UncanonicalizedIter::new(&raw_path).take(1024) { // NOTE: Many of these prefetches will "miss", but that's okay. // They're going to be pipelined anyway. - load.prefetch(root, Path::new(&relative), dep.version_req()); + load.prefetch( + root, + Path::new(&relative), + dep.package_name(), + dep.version_req(), + )?; } - - // TODO: make sure that the things we prefetch do not get - // double-checked later on _unless_ there has been an update_index. } } } Ok(()) } +} +impl Summaries { /// Parse out a `Summaries` instances from on-disk state. /// /// This will attempt to prefer parsing a previous cache file that already diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 308034471d2..b62174f1ccc 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -370,6 +370,7 @@ impl<'a> RegistryDependency<'a> { } pub struct Fetched { + name: InternedString, path: PathBuf, req: semver::VersionReq, } @@ -381,7 +382,14 @@ pub trait RegistryData { fn start_prefetch(&mut self) -> CargoResult { Ok(false) } - fn prefetch(&mut self, root: &Path, path: &Path, req: &semver::VersionReq) -> CargoResult<()> { + // Must over-approximate. + fn prefetch( + &mut self, + _: &Path, + _: &Path, + _: InternedString, + _: &semver::VersionReq, + ) -> CargoResult<()> { Ok(()) } fn next_prefetched(&mut self) -> CargoResult> { @@ -591,6 +599,12 @@ impl<'cfg> RegistrySource<'cfg> { } impl<'cfg> Source for RegistrySource<'cfg> { + fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { + // TODO: conditional index update? + self.index.prefetch(deps, &mut *self.ops)?; + Ok(()) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { // If this is a precise dependency, then it came from a lock file and in // theory the registry is known to contain this version. If, however, we diff --git a/src/cargo/sources/replaced.rs b/src/cargo/sources/replaced.rs index 7f4a622fd84..04b0920e038 100644 --- a/src/cargo/sources/replaced.rs +++ b/src/cargo/sources/replaced.rs @@ -1,6 +1,7 @@ use crate::core::source::MaybePackage; use crate::core::{Dependency, Package, PackageId, Source, SourceId, Summary}; use crate::util::errors::{CargoResult, CargoResultExt}; +use std::borrow::Cow; pub struct ReplacedSource<'cfg> { to_replace: SourceId, @@ -39,6 +40,22 @@ impl<'cfg> Source for ReplacedSource<'cfg> { self.inner.requires_precise() } + fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { + let (replace_with, to_replace) = (self.replace_with, self.to_replace); + self.inner + .prefetch( + &mut deps + .map(|dep| Cow::Owned(dep.into_owned().map_source(to_replace, replace_with))), + ) + .chain_err(|| { + format!( + "failed to prefetch from replaced source {}", + self.to_replace + ) + })?; + Ok(()) + } + fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()> { let (replace_with, to_replace) = (self.replace_with, self.to_replace); let dep = dep.clone().map_source(to_replace, replace_with); From 5568cb66cb3571c429decf9fc4be21658ac5f571 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 11:01:55 -0800 Subject: [PATCH 22/83] Document fields --- src/cargo/sources/registry/http_remote.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 778e955fa69..1d23db75ea3 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -127,13 +127,29 @@ pub struct HttpRegistry<'cfg> { cache_path: Filesystem, source_id: SourceId, config: &'cfg Config, + + /// The current (last known) state of the changelog. at: Cell<(ChangelogState, InternedString)>, + + /// Have we loaded self.at from .last-updated (by calling prepare) yet? checked_for_at: Cell, + + /// Cached HTTP handle for synchronous requests (changelog + RegistryData::load). http: RefCell>, + + /// HTTP multi-handle for asynchronous/parallel requests during prefetching. prefetch: Multi, + + /// State for currently pending prefetch downloads. + downloads: Downloads, + + /// Does the config say that we can use HTTP multiplexing? multiplexing: bool, + + /// Has a prefetch phase been run? + /// + /// If so, we do not need to double-check any index files -- the prefetch stage already did. prefetched: bool, - downloads: Downloads, } impl<'cfg> HttpRegistry<'cfg> { From e29b21e123258f8106c7d2ded8da51abcb382f47 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 11:02:11 -0800 Subject: [PATCH 23/83] Boolean logic is hard --- src/cargo/sources/registry/http_remote.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 1d23db75ea3..a30d4db253e 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -523,7 +523,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } fn next_prefetched(&mut self) -> CargoResult> { - while !self.downloads.pending.is_empty() && !self.downloads.eager.is_empty() { + while !self.downloads.pending.is_empty() || self.downloads.eager.is_empty() { // We may already have packages that are ready to go. This takes care of grabbing the // next of those, while ensuring that we yield every distinct version requirement for // each package. From 202258ede1af97c39be3d9f2c25442bb1c3a3d75 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 11:03:25 -0800 Subject: [PATCH 24/83] Fix interactions between prefetching and loads --- src/cargo/sources/registry/http_remote.rs | 58 ++++++++++++++++------- tests/testsuite/http_registry.rs | 13 +---- 2 files changed, 42 insertions(+), 29 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index a30d4db253e..28050df4c39 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -140,6 +140,11 @@ pub struct HttpRegistry<'cfg> { /// HTTP multi-handle for asynchronous/parallel requests during prefetching. prefetch: Multi, + /// Has the client requested a cache update? + /// + /// Only if they have do we double-check the freshness of each locally-stored index file. + requested_update: bool, + /// State for currently pending prefetch downloads. downloads: Downloads, @@ -150,6 +155,9 @@ pub struct HttpRegistry<'cfg> { /// /// If so, we do not need to double-check any index files -- the prefetch stage already did. prefetched: bool, + + /// If we are currently prefetching, all calls to RegistryData::load should go to disk. + is_prefetching: bool, } impl<'cfg> HttpRegistry<'cfg> { @@ -164,8 +172,10 @@ impl<'cfg> HttpRegistry<'cfg> { http: RefCell::new(None), prefetch: Multi::new(), multiplexing: false, - prefetched: false, downloads: Downloads::default(), + prefetched: false, + requested_update: false, + is_prefetching: false, } } @@ -250,9 +260,6 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { handle.get(true)?; handle.follow_location(true)?; - // TODO: explicitly enable HTTP2? - // https://github.com/rust-lang/cargo/blob/905134577c1955ad7865bcf4b31440d4bc882cde/src/cargo/core/package.rs#L651-L703 - // NOTE: lifted from src/cargo/core/package.rs // // This is an option to `libcurl` which indicates that if there's a @@ -293,8 +300,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // let's not flood crates.io with connections self.prefetch.set_max_host_connections(2)?; - self.prefetched = true; - + self.is_prefetching = true; Ok(true) } @@ -326,11 +332,14 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let pkg = root.join(path); let bytes; let was = if pkg.exists() { - if self.at.get().0.is_synchronized() { + if self.at.get().0.is_synchronized() || !self.requested_update { + debug!("not prefetching fresh {}", name); + // We already have this file locally, and we don't need to double-check it with - // upstream because we have a changelog, so there's really nothing to prefetch. - // We do keep track of the request though so that we will eventually yield this - // back to the caller who may then want to prefetch other transitive dependencies. + // upstream because we have a changelog, or because the client hasn't requested an + // index update. So there's really nothing to prefetch. We do keep track of the + // request though so that we will eventually yield this back to the caller who may + // then want to prefetch other transitive dependencies. if let Some(f) = self .downloads .eager @@ -600,8 +609,6 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { "index file downloaded with status code {}", handle.response_code()? ); - // TODO: How do we ensure that the next call to load doesn't _also_ send an HTTP - // request? Do we need to keep track of each fetched prefetched path or something? match code { 200 => { // We got data back, hooray! @@ -649,7 +656,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { if self.downloads.pending.is_empty() { // We're all done! - return Ok(None); + break; } // We have no more replies to provide the caller with, @@ -662,6 +669,10 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { .wait(&mut [], timeout) .chain_err(|| "failed to wait on curl `Multi`")?; } + + debug!("prefetched all transitive dependencies"); + self.is_prefetching = false; + self.prefetched = true; Ok(None) } @@ -714,8 +725,13 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { anyhow::bail!("index file is missing HTTP header header"); }; - // NOTE: We should always double-check for changes to config.json. - let double_check = !self.at.get().0.is_synchronized() || path.ends_with("config.json"); + let is_synchronized = self.at.get().0.is_synchronized(); + let is_fresh = + is_synchronized || !self.requested_update || self.prefetched || self.is_prefetching; + + // NOTE: We should double-check for changes to config.json even if synchronized. + let double_check = + !is_fresh || (self.requested_update && path.ends_with("config.json")); if double_check { if self.prefetched { @@ -731,11 +747,15 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } else { debug!("double-checking freshness of {}", path.display()); } - } else { - debug!( - "using {} from cache as changelog is synchronized", + } else if is_synchronized { + trace!( + "using local {} as changelog is synchronized", path.display() ); + } else if self.is_prefetching { + trace!("using local {} in load while prefetching", path.display()); + } else { + debug!("using local {} as it is fresh enough", path.display()); } // NOTE: If we're in offline mode, we don't double-check with the server. @@ -748,6 +768,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { Some((etag, last_modified, rest)) } } else { + assert!(!self.is_prefetching); None }; @@ -882,6 +903,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { debug!("updating the index"); // Make sure that subsequent loads double-check with the server again. + self.requested_update = true; self.prefetched = false; self.prepare()?; diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index 096e91aa982..864a027fbe0 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -440,25 +440,16 @@ fn update_publish_then_update(config: RegistryServerConfiguration) { // Finally, build the first project again (with our newer Cargo.lock) which // should force an update of the old registry, download the new crate, and // then build everything again. - // - // However, if the server does not support a changelog, the index file will be double-checked - // with the backend when it is loaded, and will be updated at that time. There is no index - // update. - let updating = if matches!(config, RegistryServerConfiguration::NoChangelog) { - "" - } else { - "[UPDATING] [..]\n" - }; p.cargo("build") .with_stderr(format!( - "{u}\ + "\ +[UPDATING] [..] [DOWNLOADING] crates ... [DOWNLOADED] a v0.1.1 (http registry `{reg}`) [COMPILING] a v0.1.1 [COMPILING] foo v0.5.0 ([CWD]) [FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s ", - u = updating, reg = url )) .run(); From acd3ee707e2c9351c70aacb088cd08c76973fcae Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 11:07:31 -0800 Subject: [PATCH 25/83] Give example crate for load source failure --- src/cargo/core/registry.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cargo/core/registry.rs b/src/cargo/core/registry.rs index 53775e25e4e..bb2c9e2d8b2 100644 --- a/src/cargo/core/registry.rs +++ b/src/cargo/core/registry.rs @@ -511,7 +511,10 @@ impl<'cfg> Registry for PackageRegistry<'cfg> { for (s, deps) in deps_per_source { // Ensure the requested source_id is loaded self.ensure_loaded(s, Kind::Normal).chain_err(|| { - anyhow::format_err!("failed to load source for dependency prefetching",) + anyhow::format_err!( + "failed to load source for dependency `{}` during prefetching", + deps[0].package_name() + ) })?; self.sources From b52740d4a84932c4035496fb21d580d15e28864f Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 11:52:43 -0800 Subject: [PATCH 26/83] Make prefetch respect patches Since we now also have to collect the `deps` iterator, we have it implemented `ExactSizeIterator` to avoid excessive allocations. --- src/cargo/core/registry.rs | 84 +++++++++++++++++------------ src/cargo/core/source/mod.rs | 15 ++++-- src/cargo/sources/directory.rs | 5 +- src/cargo/sources/git/source.rs | 5 +- src/cargo/sources/path.rs | 5 +- src/cargo/sources/registry/index.rs | 2 +- src/cargo/sources/registry/mod.rs | 5 +- src/cargo/sources/replaced.rs | 5 +- 8 files changed, 84 insertions(+), 42 deletions(-) diff --git a/src/cargo/core/registry.rs b/src/cargo/core/registry.rs index bb2c9e2d8b2..dec538fa050 100644 --- a/src/cargo/core/registry.rs +++ b/src/cargo/core/registry.rs @@ -17,7 +17,10 @@ use url::Url; /// See also `core::Source`. pub trait Registry { /// Give source the opportunity to batch pre-fetch dependency information. - fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()>; + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()>; /// Attempt to find the packages that match a dependency request. fn query( @@ -486,42 +489,57 @@ https://doc.rust-lang.org/cargo/reference/overriding-dependencies.html } impl<'cfg> Registry for PackageRegistry<'cfg> { - fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { assert!(self.patches_locked); + let ndeps = deps.len(); + + // We need to partition deps so that we can prefetch dependencies from different + // sources. Note that we do not prefetch from overrides. + let mut deps_per_source = HashMap::with_capacity(ndeps); + for dep in deps { + // We need to check for patches, as they may tell us to look at a different source. + // If they do, we want to make sure we don't access the original registry + // unnecessarily. + let mut patches = Vec::::new(); + if let Some(extra) = self.patches.get(dep.source_id().canonical_url()) { + patches.extend( + extra + .iter() + .filter(|s| dep.matches_ignoring_source(s.package_id())) + .cloned(), + ); + } + + let source_id = if patches.len() == 1 && dep.is_locked() { + // Perform the prefetch from the patched-in source instead. + patches.remove(0).source_id() + } else { + // The code in `fn query` accesses the original source here, so we do too. + dep.source_id() + }; + + deps_per_source + .entry(source_id) + .or_insert_with(|| Vec::with_capacity(ndeps)) + .push(dep); + } + + for (s, deps) in deps_per_source { + // Ensure the requested source_id is loaded + self.ensure_loaded(s, Kind::Normal).chain_err(|| { + anyhow::format_err!( + "failed to load source for dependency `{}` during prefetching", + deps[0].package_name() + ) + })?; - if self.sources.len() == 1 { - // Fast path -- there is only one source, so no need to partition by SourceId. self.sources - .sources_mut() - .next() + .get_mut(s) .unwrap() - .1 - .prefetch(deps)?; - } else { - // We need to partition deps so that we can prefetch dependencies from different - // sources. Note that we do not prefetch from overrides. - let mut deps_per_source = HashMap::new(); - for dep in deps { - deps_per_source - .entry(dep.source_id()) - .or_insert_with(Vec::new) - .push(dep); - } - - for (s, deps) in deps_per_source { - // Ensure the requested source_id is loaded - self.ensure_loaded(s, Kind::Normal).chain_err(|| { - anyhow::format_err!( - "failed to load source for dependency `{}` during prefetching", - deps[0].package_name() - ) - })?; - - self.sources - .get_mut(s) - .unwrap() - .prefetch(&mut deps.into_iter())?; - } + .prefetch(&mut deps.into_iter())?; } Ok(()) diff --git a/src/cargo/core/source/mod.rs b/src/cargo/core/source/mod.rs index 00a3980bedc..ef61567fdf6 100644 --- a/src/cargo/core/source/mod.rs +++ b/src/cargo/core/source/mod.rs @@ -29,7 +29,10 @@ pub trait Source { fn requires_precise(&self) -> bool; /// Give source the opportunity to batch pre-fetch dependency information. - fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()>; + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()>; /// Attempts to find the packages that match a dependency request. fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> CargoResult<()>; @@ -134,7 +137,10 @@ impl<'a, T: Source + ?Sized + 'a> Source for Box { } /// Forwards to `Source::prefetch`. - fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { (**self).prefetch(deps) } @@ -206,7 +212,10 @@ impl<'a, T: Source + ?Sized + 'a> Source for &'a mut T { (**self).requires_precise() } - fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { (**self).prefetch(deps) } diff --git a/src/cargo/sources/directory.rs b/src/cargo/sources/directory.rs index 45939ca339e..7fcee08cbfd 100644 --- a/src/cargo/sources/directory.rs +++ b/src/cargo/sources/directory.rs @@ -43,7 +43,10 @@ impl<'cfg> Debug for DirectorySource<'cfg> { } impl<'cfg> Source for DirectorySource<'cfg> { - fn prefetch(&mut self, _: &mut dyn Iterator>) -> CargoResult<()> { + fn prefetch( + &mut self, + _: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { Ok(()) } diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 417611c782e..13a12d84ba9 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -84,7 +84,10 @@ impl<'cfg> Debug for GitSource<'cfg> { } impl<'cfg> Source for GitSource<'cfg> { - fn prefetch(&mut self, _: &mut dyn Iterator>) -> CargoResult<()> { + fn prefetch( + &mut self, + _: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { Ok(()) } diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index f898e04bcb6..6c6a2607cf5 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -470,7 +470,10 @@ impl<'cfg> Debug for PathSource<'cfg> { } impl<'cfg> Source for PathSource<'cfg> { - fn prefetch(&mut self, _: &mut dyn Iterator>) -> CargoResult<()> { + fn prefetch( + &mut self, + _: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { Ok(()) } diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index 4201c1f1fc2..2735cf23438 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -459,7 +459,7 @@ impl<'cfg> RegistryIndex<'cfg> { pub fn prefetch( &mut self, - deps: &mut dyn Iterator>, + deps: &mut dyn ExactSizeIterator>, load: &mut dyn RegistryData, ) -> CargoResult<()> { // For some registry backends, it's expensive to fetch each individual index file, and the diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index b62174f1ccc..75afeefed20 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -599,7 +599,10 @@ impl<'cfg> RegistrySource<'cfg> { } impl<'cfg> Source for RegistrySource<'cfg> { - fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { // TODO: conditional index update? self.index.prefetch(deps, &mut *self.ops)?; Ok(()) diff --git a/src/cargo/sources/replaced.rs b/src/cargo/sources/replaced.rs index 04b0920e038..efe2d5d3a11 100644 --- a/src/cargo/sources/replaced.rs +++ b/src/cargo/sources/replaced.rs @@ -40,7 +40,10 @@ impl<'cfg> Source for ReplacedSource<'cfg> { self.inner.requires_precise() } - fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { + fn prefetch( + &mut self, + deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { let (replace_with, to_replace) = (self.replace_with, self.to_replace); self.inner .prefetch( From 5799fc8a1ae9d205c818222613bd8ebeb9bf841e Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 11:54:50 -0800 Subject: [PATCH 27/83] Fix up expected output in path test --- tests/testsuite/path.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testsuite/path.rs b/tests/testsuite/path.rs index d0f380156f2..81300ecf2a6 100644 --- a/tests/testsuite/path.rs +++ b/tests/testsuite/path.rs @@ -1044,12 +1044,12 @@ fn deep_path_error() { .with_status(101) .with_stderr( "\ -[ERROR] failed to get `c` as a dependency of package `b v0.1.0 [..]` +[ERROR] failed to prefetch dependencies of package `b v0.1.0 [..]` ... which is depended on by `a v0.1.0 [..]` ... which is depended on by `foo v0.1.0 [..]` Caused by: - failed to load source for dependency `c` + failed to load source for dependency `c` during prefetching Caused by: Unable to update [..]/foo/c From d0d4ccbd17d5ebf9958f261d107122d01c7b411c Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 12:10:12 -0800 Subject: [PATCH 28/83] Improve docs for prefetching methods/types --- src/cargo/sources/registry/index.rs | 11 ++++---- src/cargo/sources/registry/mod.rs | 41 +++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index 2735cf23438..e495208d3a6 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -471,8 +471,7 @@ impl<'cfg> RegistryIndex<'cfg> { // // We have the advantage here of being able to play fast and loose with the exact // dependency requirements. It's fine if we fetch a bit too much, since the incremental - // cost of each index file is small. It's even fine if we fetch too few index files -- - // they'll just have to be fetched on the slow path later. + // cost of each index file is small. if self.config.offline() || !load.start_prefetch()? { // Backend does not support prefetching. return Ok(()); @@ -509,14 +508,14 @@ impl<'cfg> RegistryIndex<'cfg> { // Now, continuously iterate by walking dependencies we've loaded and fetching the index // entry for _their_ dependencies. while let Some(fetched) = load.next_prefetched()? { - let summaries = if let Some(s) = self.summaries_cache.get_mut(&fetched.name) { + let summaries = if let Some(s) = self.summaries_cache.get_mut(&fetched.name()) { s } else { let summaries = Summaries::parse( index_version.as_deref(), root, &cache_root, - &fetched.path, + fetched.path(), self.source_id, load, self.config, @@ -524,7 +523,7 @@ impl<'cfg> RegistryIndex<'cfg> { let summaries = if let Some(s) = summaries { s } else { continue }; - match self.summaries_cache.entry(fetched.name) { + match self.summaries_cache.entry(fetched.name()) { Entry::Vacant(v) => v.insert(summaries), Entry::Occupied(mut o) => { let _ = o.insert(summaries); @@ -534,7 +533,7 @@ impl<'cfg> RegistryIndex<'cfg> { }; for (version, maybe_summary) in &mut summaries.versions { - if !fetched.req.matches(&version) { + if !fetched.version_req().matches(&version) { // The crate that pulled in this crate as a dependency did not care about this // particular version, so we don't need to walk its dependencies. // diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 75afeefed20..3643013fc6d 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -369,29 +369,60 @@ impl<'a> RegistryDependency<'a> { } } +/// An indicator that the prefetching for a given package has completed. +/// +/// To retrieve the index data for the package, use `Summaries::parse`. pub struct Fetched { name: InternedString, path: PathBuf, req: semver::VersionReq, } +impl Fetched { + pub fn name(&self) -> InternedString { + self.name + } + + pub fn path(&self) -> &Path { + &self.path + } + + pub fn version_req(&self) -> &semver::VersionReq { + &self.req + } +} + pub trait RegistryData { fn prepare(&self) -> CargoResult<()>; fn index_path(&self) -> &Filesystem; + /// Initiate a prefetch phase. + /// + /// During prefetch, a greedy dependency solver will talk the transitive dependency closure of + /// the package being built and call `prefetch` on each dependency. This allows an + /// implementation to pipeline the download of information for those dependencies, rather than + /// relying on synchronous calls to `load` later on. + /// + /// If this method returns `false` (the default), no prefetching happens. fn start_prefetch(&mut self) -> CargoResult { Ok(false) } - // Must over-approximate. + + /// Enqueue a prefetch of the given package. + /// + /// The package path, name, and dependency versions requirements are passed back from + /// `next_prefetched` so that they can be used to inform future calls to `prefetch`. fn prefetch( &mut self, - _: &Path, - _: &Path, - _: InternedString, - _: &semver::VersionReq, + _root: &Path, + _path: &Path, + _name: InternedString, + _req: &semver::VersionReq, ) -> CargoResult<()> { Ok(()) } + + /// Dequeue the next available prefetched index file. fn next_prefetched(&mut self) -> CargoResult> { Ok(None) } From e970a0cdbcc968f81a36879978106a22014277cf Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 12:10:37 -0800 Subject: [PATCH 29/83] Update tests to match new error messages This is kind of unfortunate since it talks about prefetching even for sources that do not support it. The reason for this is that we have to load the source before we can even check whether it supports prefetching, and for these tests what fails is loading the source... --- src/cargo/core/registry.rs | 2 +- tests/testsuite/bad_config.rs | 8 ++++---- tests/testsuite/cargo_features.rs | 2 +- tests/testsuite/directory.rs | 2 +- tests/testsuite/git.rs | 4 ++-- tests/testsuite/git_auth.rs | 6 +++--- tests/testsuite/http_registry.rs | 2 +- tests/testsuite/local_registry.rs | 2 +- tests/testsuite/offline.rs | 2 +- tests/testsuite/path.rs | 2 +- tests/testsuite/registry.rs | 2 +- tests/testsuite/replace.rs | 6 +++--- tests/testsuite/workspaces.rs | 2 +- 13 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/cargo/core/registry.rs b/src/cargo/core/registry.rs index dec538fa050..01f3b8b050f 100644 --- a/src/cargo/core/registry.rs +++ b/src/cargo/core/registry.rs @@ -531,7 +531,7 @@ impl<'cfg> Registry for PackageRegistry<'cfg> { // Ensure the requested source_id is loaded self.ensure_loaded(s, Kind::Normal).chain_err(|| { anyhow::format_err!( - "failed to load source for dependency `{}` during prefetching", + "failed to load source for dependency `{}`", deps[0].package_name() ) })?; diff --git a/tests/testsuite/bad_config.rs b/tests/testsuite/bad_config.rs index a71d66c54c7..bf927f6cade 100644 --- a/tests/testsuite/bad_config.rs +++ b/tests/testsuite/bad_config.rs @@ -368,7 +368,7 @@ fn bad_git_dependency() { .with_stderr( "\ [UPDATING] git repository `file:///` -[ERROR] failed to get `foo` as a dependency of package `foo v0.0.0 [..]` +[ERROR] failed to prefetch dependencies of package `foo v0.0.0 [..]` Caused by: failed to load source for dependency `foo` @@ -934,7 +934,7 @@ fn bad_source_config2() { .with_status(101) .with_stderr( "\ -[ERROR] failed to get `bar` as a dependency of package `foo v0.0.0 [..]` +[ERROR] failed to prefetch dependencies of package `foo v0.0.0 [..]` Caused by: failed to load source for dependency `bar` @@ -980,7 +980,7 @@ fn bad_source_config3() { .with_status(101) .with_stderr( "\ -[ERROR] failed to get `bar` as a dependency of package `foo v0.0.0 [..]` +[ERROR] failed to prefetch dependencies of package `foo v0.0.0 [..]` Caused by: failed to load source for dependency `bar` @@ -1028,7 +1028,7 @@ fn bad_source_config4() { .with_status(101) .with_stderr( "\ -[ERROR] failed to get `bar` as a dependency of package `foo v0.0.0 ([..])` +[ERROR] failed to prefetch dependencies of package `foo v0.0.0 ([..])` Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/cargo_features.rs b/tests/testsuite/cargo_features.rs index 02a41c4fde6..dda7a5f9a54 100644 --- a/tests/testsuite/cargo_features.rs +++ b/tests/testsuite/cargo_features.rs @@ -199,7 +199,7 @@ fn nightly_feature_requires_nightly_in_dep() { .with_status(101) .with_stderr( "\ -[ERROR] failed to get `a` as a dependency of package `b v0.0.1 ([..])` +[ERROR] failed to prefetch dependencies of package `b v0.0.1 ([..])` Caused by: failed to load source for dependency `a` diff --git a/tests/testsuite/directory.rs b/tests/testsuite/directory.rs index 85a5dd5842c..0b4d7032f92 100644 --- a/tests/testsuite/directory.rs +++ b/tests/testsuite/directory.rs @@ -653,7 +653,7 @@ fn git_override_requires_lockfile() { .with_status(101) .with_stderr( "\ -[ERROR] failed to get `git` as a dependency of package `foo v0.0.1 ([..])` +[ERROR] failed to prefetch dependencies of package `foo v0.0.1 ([..])` Caused by: failed to load source for dependency `git` diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index 548d7264c2a..fce51d32ca1 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -938,7 +938,7 @@ fn dep_with_bad_submodule() { "\ [UPDATING] git repository [..] [UPDATING] git submodule `file://[..]/dep2` -[ERROR] failed to get `dep1` as a dependency of package `foo v0.5.0 [..]` +[ERROR] failed to prefetch dependencies of package `foo v0.5.0 [..]` Caused by: failed to load source for dependency `dep1` @@ -2362,7 +2362,7 @@ fn invalid_git_dependency_manifest() { .with_stderr(&format!( "\ [UPDATING] git repository `{}` -[ERROR] failed to get `dep1` as a dependency of package `foo v0.5.0 ([..])` +[ERROR] failed to prefetch dependencies of package `foo v0.5.0 ([..])` Caused by: failed to load source for dependency `dep1` diff --git a/tests/testsuite/git_auth.rs b/tests/testsuite/git_auth.rs index 85702290af7..4a8c60a8988 100644 --- a/tests/testsuite/git_auth.rs +++ b/tests/testsuite/git_auth.rs @@ -137,7 +137,7 @@ fn http_auth_offered() { .with_stderr_contains(&format!( "\ [UPDATING] git repository `http://{addr}/foo/bar` -[ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 [..]` +[ERROR] failed to prefetch dependencies of package `foo v0.0.1 [..]` Caused by: failed to load source for dependency `bar` @@ -299,7 +299,7 @@ fn net_err_suggests_fetch_with_cli() { [UPDATING] git repository `ssh://needs-proxy.invalid/git` warning: spurious network error[..] warning: spurious network error[..] -[ERROR] failed to get `foo` as a dependency of package `foo v0.0.0 [..]` +[ERROR] failed to prefetch dependencies of package `foo v0.0.0 [..]` Caused by: failed to load source for dependency `foo` @@ -368,7 +368,7 @@ fn instead_of_url_printed() { .with_stderr(&format!( "\ [UPDATING] git repository `https://foo.bar/foo/bar` -[ERROR] failed to get `bar` as a dependency of package `foo [..]` +[ERROR] failed to prefetch dependencies of package `foo [..]` Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index 864a027fbe0..8c3684aade6 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -754,7 +754,7 @@ fn disallow_network(config: RegistryServerConfiguration) { .with_status(101) .with_stderr( "\ -[ERROR] failed to get `foo` as a dependency of package `bar v0.5.0 [..]` +[ERROR] failed to prefetch dependencies of package `bar v0.5.0 [..]` Caused by: failed to load source for dependency `foo` diff --git a/tests/testsuite/local_registry.rs b/tests/testsuite/local_registry.rs index 485ec89dcb9..522852b0189 100644 --- a/tests/testsuite/local_registry.rs +++ b/tests/testsuite/local_registry.rs @@ -359,7 +359,7 @@ fn invalid_dir_bad() { .with_status(101) .with_stderr( "\ -[ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 [..]` +[ERROR] failed to prefetch dependencies of package `foo v0.0.1 [..]` Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/offline.rs b/tests/testsuite/offline.rs index a5505cff781..bd5bbe25e83 100644 --- a/tests/testsuite/offline.rs +++ b/tests/testsuite/offline.rs @@ -270,7 +270,7 @@ fn cargo_compile_forbird_git_httpsrepo_offline() { .build(); p.cargo("build --offline").with_status(101).with_stderr("\ -[ERROR] failed to get `dep1` as a dependency of package `foo v0.5.0 [..]` +[ERROR] failed to prefetch dependencies of package `foo v0.5.0 [..]` Caused by: failed to load source for dependency `dep1` diff --git a/tests/testsuite/path.rs b/tests/testsuite/path.rs index 81300ecf2a6..38495d148bb 100644 --- a/tests/testsuite/path.rs +++ b/tests/testsuite/path.rs @@ -511,7 +511,7 @@ fn error_message_for_missing_manifest() { .with_status(101) .with_stderr( "\ -[ERROR] failed to get `bar` as a dependency of package `foo v0.5.0 [..]` +[ERROR] failed to prefetch dependencies of package `foo v0.5.0 [..]` Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/registry.rs b/tests/testsuite/registry.rs index acba4a2c413..3fb4e76f831 100644 --- a/tests/testsuite/registry.rs +++ b/tests/testsuite/registry.rs @@ -1562,7 +1562,7 @@ fn disallow_network() { .with_status(101) .with_stderr( "\ -[ERROR] failed to get `foo` as a dependency of package `bar v0.5.0 [..]` +[ERROR] failed to prefetch dependencies of package `bar v0.5.0 [..]` Caused by: failed to load source for dependency `foo` diff --git a/tests/testsuite/replace.rs b/tests/testsuite/replace.rs index ad535eb13f9..ee9d6d28f07 100644 --- a/tests/testsuite/replace.rs +++ b/tests/testsuite/replace.rs @@ -544,7 +544,7 @@ fn override_wrong_name() { "\ [UPDATING] [..] index [UPDATING] git repository [..] -[ERROR] failed to get `baz` as a dependency of package `foo v0.0.1 ([..])` +[ERROR] failed to prefetch dependencies of package `foo v0.0.1 ([..])` Caused by: no matching package for override `[..]baz:0.1.0` found @@ -591,7 +591,7 @@ fn override_with_nothing() { "\ [UPDATING] [..] index [UPDATING] git repository [..] -[ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 ([..])` +[ERROR] failed to prefetch dependencies of package `foo v0.0.1 ([..])` Caused by: failed to load source for dependency `bar` @@ -677,7 +677,7 @@ fn multiple_specs() { "\ [UPDATING] [..] index [UPDATING] git repository [..] -[ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 ([..])` +[ERROR] failed to prefetch dependencies of package `foo v0.0.1 ([..])` Caused by: overlapping replacement specifications found: diff --git a/tests/testsuite/workspaces.rs b/tests/testsuite/workspaces.rs index a20ef11ba70..355a8ee5306 100644 --- a/tests/testsuite/workspaces.rs +++ b/tests/testsuite/workspaces.rs @@ -2302,7 +2302,7 @@ fn invalid_missing() { .with_status(101) .with_stderr( "\ -[ERROR] failed to get `x` as a dependency of package `foo v0.1.0 [..]` +[ERROR] failed to prefetch dependencies of package `foo v0.1.0 [..]` Caused by: failed to load source for dependency `x` From e1018d0651ac0e129fa8685c6c8b3d7bed3e586a Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 12:17:32 -0800 Subject: [PATCH 30/83] Fix overeager test fixing --- tests/testsuite/path.rs | 2 +- tests/testsuite/replace.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testsuite/path.rs b/tests/testsuite/path.rs index 38495d148bb..85b1c33c33d 100644 --- a/tests/testsuite/path.rs +++ b/tests/testsuite/path.rs @@ -1049,7 +1049,7 @@ fn deep_path_error() { ... which is depended on by `foo v0.1.0 [..]` Caused by: - failed to load source for dependency `c` during prefetching + failed to load source for dependency `c` Caused by: Unable to update [..]/foo/c diff --git a/tests/testsuite/replace.rs b/tests/testsuite/replace.rs index ee9d6d28f07..ad535eb13f9 100644 --- a/tests/testsuite/replace.rs +++ b/tests/testsuite/replace.rs @@ -544,7 +544,7 @@ fn override_wrong_name() { "\ [UPDATING] [..] index [UPDATING] git repository [..] -[ERROR] failed to prefetch dependencies of package `foo v0.0.1 ([..])` +[ERROR] failed to get `baz` as a dependency of package `foo v0.0.1 ([..])` Caused by: no matching package for override `[..]baz:0.1.0` found @@ -591,7 +591,7 @@ fn override_with_nothing() { "\ [UPDATING] [..] index [UPDATING] git repository [..] -[ERROR] failed to prefetch dependencies of package `foo v0.0.1 ([..])` +[ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 ([..])` Caused by: failed to load source for dependency `bar` @@ -677,7 +677,7 @@ fn multiple_specs() { "\ [UPDATING] [..] index [UPDATING] git repository [..] -[ERROR] failed to prefetch dependencies of package `foo v0.0.1 ([..])` +[ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 ([..])` Caused by: overlapping replacement specifications found: From 8ccaf4f4d2c423b3ea477243098a8fbbb3862466 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 16:09:04 -0800 Subject: [PATCH 31/83] Fix resolver-tests crate compile failure --- crates/resolver-tests/src/lib.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crates/resolver-tests/src/lib.rs b/crates/resolver-tests/src/lib.rs index b32dfc04330..d2316035b0d 100644 --- a/crates/resolver-tests/src/lib.rs +++ b/crates/resolver-tests/src/lib.rs @@ -1,6 +1,7 @@ #![allow(clippy::many_single_char_names)] #![allow(clippy::needless_range_loop)] // false positives +use std::borrow::Cow; use std::cell::RefCell; use std::cmp::PartialEq; use std::cmp::{max, min}; @@ -125,6 +126,14 @@ pub fn resolve_with_config_raw( used: HashSet, }; impl<'a> Registry for MyRegistry<'a> { + fn prefetch( + &mut self, + _deps: &mut dyn ExactSizeIterator>, + ) -> CargoResult<()> { + // Doing nothing is a valid way to prefetch. + Ok(()) + } + fn query( &mut self, dep: &Dependency, From 9ad0ff67e084f1a7fa745e75273f84c350eb33ae Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 17:52:07 -0800 Subject: [PATCH 32/83] Avoid UncanonicalizedIter during prefetch It's not really necessary. It's okay if it takes us slightly longer to generate an error. --- src/cargo/sources/registry/index.rs | 40 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index e495208d3a6..d90d800cb13 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -112,6 +112,9 @@ impl<'s> Iterator for UncanonicalizedIter<'s> { return None; } + // TODO: + // This implementation can currently generate paths like en/v-/env_logger, + // which doesn't _seem_ like a useful candidate to test? let ret = Some( self.input .chars() @@ -494,15 +497,16 @@ impl<'cfg> RegistryIndex<'cfg> { // Seed the prefetching with the root dependencies. for dep in deps { - let raw_path = relative(&*dep.package_name()); - for relative in UncanonicalizedIter::new(&raw_path).take(1024) { - load.prefetch( - root, - &Path::new(&relative), - dep.package_name(), - dep.version_req(), - )?; - } + let relative = relative(&*dep.package_name()); + // NOTE: We do not use UncanonicalizedIter here or below because if the user gave a + // misspelling, it's fine if we don't prefetch their misspelling. The resolver will be + // a bit slower, but then give them an error. + load.prefetch( + root, + &Path::new(&relative), + dep.package_name(), + dep.version_req(), + )?; } // Now, continuously iterate by walking dependencies we've loaded and fetching the index @@ -567,17 +571,13 @@ impl<'cfg> RegistryIndex<'cfg> { continue; } - let raw_path = relative(&*dep.package_name()); - for relative in UncanonicalizedIter::new(&raw_path).take(1024) { - // NOTE: Many of these prefetches will "miss", but that's okay. - // They're going to be pipelined anyway. - load.prefetch( - root, - Path::new(&relative), - dep.package_name(), - dep.version_req(), - )?; - } + let relative = relative(&*dep.package_name()); + load.prefetch( + root, + Path::new(&relative), + dep.package_name(), + dep.version_req(), + )?; } } } From 6b1bd18ec91b69777bbe09abb6673f4a6e0c0ddc Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 17:53:00 -0800 Subject: [PATCH 33/83] Keep better track of what has been downloaded --- src/cargo/sources/registry/http_remote.rs | 160 +++++++++++++--------- 1 file changed, 95 insertions(+), 65 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 28050df4c39..5ccfe1729a7 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -18,7 +18,7 @@ use curl::easy::{Easy, HttpVersion, List}; use curl::multi::{EasyHandle, Multi}; use log::{debug, trace, warn}; use std::cell::{Cell, RefCell, RefMut}; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Write as FmtWrite; use std::fs::{self, File, OpenOptions}; use std::io::prelude::*; @@ -151,10 +151,10 @@ pub struct HttpRegistry<'cfg> { /// Does the config say that we can use HTTP multiplexing? multiplexing: bool, - /// Has a prefetch phase been run? + /// What paths have we already prefetched? /// - /// If so, we do not need to double-check any index files -- the prefetch stage already did. - prefetched: bool, + /// We do not need to double-check any of these index files -- the prefetch stage already did. + prefetched: HashSet, /// If we are currently prefetching, all calls to RegistryData::load should go to disk. is_prefetching: bool, @@ -173,7 +173,7 @@ impl<'cfg> HttpRegistry<'cfg> { prefetch: Multi::new(), multiplexing: false, downloads: Downloads::default(), - prefetched: false, + prefetched: HashSet::new(), requested_update: false, is_prefetching: false, } @@ -332,32 +332,35 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let pkg = root.join(path); let bytes; let was = if pkg.exists() { - if self.at.get().0.is_synchronized() || !self.requested_update { - debug!("not prefetching fresh {}", name); + if self.at.get().0.is_synchronized() + || !self.requested_update + || self.prefetched.contains(path) + { + trace!("not prefetching fresh {}", name); // We already have this file locally, and we don't need to double-check it with // upstream because we have a changelog, or because the client hasn't requested an // index update. So there's really nothing to prefetch. We do keep track of the // request though so that we will eventually yield this back to the caller who may // then want to prefetch other transitive dependencies. - if let Some(f) = self - .downloads - .eager - .iter_mut() - .find(|f| f.primary.path == path) - { - if &f.primary.req != req { - f.others.insert(req.clone()); + use std::collections::btree_map::Entry; + match self.downloads.eager.entry(path.to_path_buf()) { + Entry::Occupied(mut o) => { + let o = o.get_mut(); + if &o.primary.req != req { + o.others.insert(req.clone()); + } + } + Entry::Vacant(v) => { + v.insert(MultiVersionFetched { + primary: Fetched { + path: path.to_path_buf(), + name, + req: req.clone(), + }, + others: HashSet::new(), + }); } - } else { - self.downloads.eager.push(MultiVersionFetched { - primary: Fetched { - path: path.to_path_buf(), - name, - req: req.clone(), - }, - others: HashSet::new(), - }); } return Ok(()); } @@ -380,7 +383,6 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { }; assert!(!self.config.offline()); - debug!("double-checking freshness of {}", path.display()); let etag = std::str::from_utf8(etag)?; let last_modified = std::str::from_utf8(last_modified)?; @@ -401,18 +403,17 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { dl.additional_reqs.insert(req.clone()); } return Ok(()); - } else if let Some(f) = self - .downloads - .eager - .iter_mut() - .find(|f| f.primary.path == path) - { + } else if let Some(f) = self.downloads.eager.get_mut(path) { if &f.primary.req != req { f.others.insert(req.clone()); } return Ok(()); } + if was.is_some() { + debug!("double-checking freshness of {}", path.display()); + } + // Looks like we're going to have to bite the bullet and do a network request. let url = self.source_id.url(); self.prepare()?; @@ -474,7 +475,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // That thread-local is set up in `next_prefetched` when it calls self.prefetch.perform, // which is what ultimately calls this method. handle.write_function(move |buf| { - debug!("{} - {} bytes of data", token, buf.len()); + trace!("{} - {} bytes of data", token, buf.len()); tls::with(|downloads| { if let Some(downloads) = downloads { downloads.pending[&token] @@ -536,17 +537,27 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // We may already have packages that are ready to go. This takes care of grabbing the // next of those, while ensuring that we yield every distinct version requirement for // each package. - if let Some(mut fetched) = self.downloads.eager.pop() { - return if let Some(req) = fetched.others.iter().next().cloned() { + // + // TODO: Use the nightly BTreeMap::pop_first when stable. + if let Some(path) = self.downloads.eager.keys().next().cloned() { + use std::collections::btree_map::Entry; + let mut fetched = if let Entry::Occupied(o) = self.downloads.eager.entry(path) { + o + } else { + unreachable!(); + }; + + return if let Some(req) = fetched.get().others.iter().next().cloned() { + let fetched = fetched.get_mut(); fetched.others.remove(&req); let ret = Ok(Some(Fetched { path: fetched.primary.path.clone(), name: fetched.primary.name, req, })); - self.downloads.eager.push(fetched); ret } else { + let fetched = fetched.remove(); Ok(Some(fetched.primary)) }; } @@ -564,7 +575,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { .perform() .chain_err(|| "failed to perform http requests") })?; - debug!("handles remaining: {}", remaining_in_multi); + trace!("handles remaining: {}", remaining_in_multi); // Walk all the messages cURL came across in case anything completed. let results = &mut self.downloads.results; @@ -583,7 +594,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // // This will ultimately add more replies to self.downloads.eager, which we'll while let Some((token, result)) = results.pop() { - debug!("{} finished with {:?}", token, result); + trace!("{} finished with {:?}", token, result); let (dl, handle) = self .downloads @@ -603,10 +614,15 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { }, others: dl.additional_reqs, }; + assert!( + self.prefetched.insert(fetched.primary.path.clone()), + "downloaded the same path twice during prefetching" + ); let code = handle.response_code()?; debug!( - "index file downloaded with status code {}", + "index file for {} downloaded with status code {}", + fetched.primary.name, handle.response_code()? ); match code { @@ -628,13 +644,25 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { file.write_all(&data)?; file.flush()?; - self.downloads.eager.push(fetched); + assert!( + self.downloads + .eager + .insert(fetched.primary.path.clone(), fetched) + .is_none(), + "download finished for already-finished path" + ); } 304 => { // Not Modified response. // There's nothing for us to do -- the index file is up to date. // The only thing that matters is telling the caller about this package. - self.downloads.eager.push(fetched); + assert!( + self.downloads + .eager + .insert(fetched.primary.path.clone(), fetched) + .is_none(), + "download finished for already-finished path" + ); } 404 => { // Not Found response. @@ -672,7 +700,6 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { debug!("prefetched all transitive dependencies"); self.is_prefetching = false; - self.prefetched = true; Ok(None) } @@ -726,40 +753,42 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { }; let is_synchronized = self.at.get().0.is_synchronized(); - let is_fresh = - is_synchronized || !self.requested_update || self.prefetched || self.is_prefetching; - // NOTE: We should double-check for changes to config.json even if synchronized. - let double_check = - !is_fresh || (self.requested_update && path.ends_with("config.json")); - - if double_check { - if self.prefetched { + let is_fresh = if is_synchronized { + if self.requested_update && path.ends_with("config.json") { + debug!("double-checking freshness of {} on update", path.display()); + false + } else { trace!( - "not double-checking freshness of {} after prefetch", - path.display() - ); - } else if self.config.offline() { - debug!( - "not double-checking freshness of {} due to offline", + "using local {} as changelog is synchronized", path.display() ); - } else { - debug!("double-checking freshness of {}", path.display()); + true } - } else if is_synchronized { + } else if !self.requested_update { trace!( - "using local {} as changelog is synchronized", + "using local {} as user did not request update", path.display() ); + true + } else if self.config.offline() { + trace!("using local {} in offline mode", path.display()); + true } else if self.is_prefetching { trace!("using local {} in load while prefetching", path.display()); + true + } else if self.prefetched.contains(path) { + trace!( + "using local {} as it was already prefetched", + path.display() + ); + true } else { - debug!("using local {} as it is fresh enough", path.display()); - } + debug!("double-checking freshness of {}", path.display()); + false + }; - // NOTE: If we're in offline mode, we don't double-check with the server. - if !double_check || self.config.offline() { + if is_fresh { return data(rest); } else { // We cannot trust the index files and need to double-check with server. @@ -904,7 +933,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // Make sure that subsequent loads double-check with the server again. self.requested_update = true; - self.prefetched = false; + self.prefetched.clear(); self.prepare()?; let path = self.config.assert_package_cache_locked(&self.index_path); @@ -1496,7 +1525,8 @@ pub struct Downloads { /// handle one at a time. results: Vec<(usize, Result<(), curl::Error>)>, /// Prefetch requests that we already have a response to. - eager: Vec, + /// NOTE: Should this maybe be some kind of heap? + eager: BTreeMap, /// The next ID to use for creating a token (see `Download::token`). next: usize, } From 5d092de5942c8a10a15fc93f64c37289f9eab2cb Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 25 Nov 2020 18:09:40 -0800 Subject: [PATCH 34/83] Don't yield the same package many times Note that this could be a decent amount nicer with https://github.com/steveklabnik/semver/issues/170 This currently ends up in some kind of loop for cargo-like deps: yielding already-fetched rustversion yielding already-fetched futures yielding already-fetched thiserror yielding already-fetched anyhow yielding already-fetched rustversion yielding already-fetched futures yielding already-fetched thiserror Everything _up_ to that point terminates pretty quickly. --- src/cargo/sources/registry/http_remote.rs | 90 ++++++++--------------- src/cargo/sources/registry/index.rs | 2 +- src/cargo/sources/registry/mod.rs | 7 +- 3 files changed, 34 insertions(+), 65 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 5ccfe1729a7..dfc6b7ae120 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -346,19 +346,18 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { use std::collections::btree_map::Entry; match self.downloads.eager.entry(path.to_path_buf()) { Entry::Occupied(mut o) => { - let o = o.get_mut(); - if &o.primary.req != req { - o.others.insert(req.clone()); - } + o.get_mut().reqs.insert(req.clone()); } Entry::Vacant(v) => { - v.insert(MultiVersionFetched { - primary: Fetched { - path: path.to_path_buf(), - name, - req: req.clone(), - }, - others: HashSet::new(), + if self.prefetched.contains(path) { + debug!("yielding already-prefetched {}", name); + } + let mut reqs = HashSet::new(); + reqs.insert(req.clone()); + v.insert(Fetched { + path: path.to_path_buf(), + name, + reqs, }); } } @@ -399,14 +398,10 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { .pending .get_mut(token) .expect("invalid token"); - if &dl.req != req { - dl.additional_reqs.insert(req.clone()); - } + dl.reqs.insert(req.clone()); return Ok(()); } else if let Some(f) = self.downloads.eager.get_mut(path) { - if &f.primary.req != req { - f.others.insert(req.clone()); - } + f.reqs.insert(req.clone()); return Ok(()); } @@ -459,13 +454,14 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { None, "path queued for download more than once" ); + let mut reqs = HashSet::new(); + reqs.insert(req.clone()); let dl = Download { token, data: RefCell::new(Vec::new()), path: path.to_path_buf(), name, - req: req.clone(), - additional_reqs: HashSet::new(), + reqs, etag: RefCell::new(None), last_modified: RefCell::new(None), }; @@ -540,26 +536,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // // TODO: Use the nightly BTreeMap::pop_first when stable. if let Some(path) = self.downloads.eager.keys().next().cloned() { - use std::collections::btree_map::Entry; - let mut fetched = if let Entry::Occupied(o) = self.downloads.eager.entry(path) { - o - } else { - unreachable!(); - }; - - return if let Some(req) = fetched.get().others.iter().next().cloned() { - let fetched = fetched.get_mut(); - fetched.others.remove(&req); - let ret = Ok(Some(Fetched { - path: fetched.primary.path.clone(), - name: fetched.primary.name, - req, - })); - ret - } else { - let fetched = fetched.remove(); - Ok(Some(fetched.primary)) - }; + let fetched = self.downloads.eager.remove(&path).unwrap(); + return Ok(Some(fetched)); } // We don't have any fetched results immediately ready to be yielded, @@ -606,23 +584,20 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let mut handle = self.prefetch.remove(handle)?; self.downloads.pending_ids.remove(&dl.path); - let fetched = MultiVersionFetched { - primary: Fetched { - path: dl.path, - name: dl.name, - req: dl.req, - }, - others: dl.additional_reqs, + let fetched = Fetched { + path: dl.path, + name: dl.name, + reqs: dl.reqs, }; assert!( - self.prefetched.insert(fetched.primary.path.clone()), + self.prefetched.insert(fetched.path.clone()), "downloaded the same path twice during prefetching" ); let code = handle.response_code()?; debug!( "index file for {} downloaded with status code {}", - fetched.primary.name, + fetched.name, handle.response_code()? ); match code { @@ -630,7 +605,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // We got data back, hooray! // Let's update the index file. let path = self.config.assert_package_cache_locked(&self.index_path); - let pkg = path.join(&fetched.primary.path); + let pkg = path.join(&fetched.path); paths::create_dir_all(pkg.parent().expect("pkg is a file"))?; let mut file = paths::create(pkg)?; file.write_all(dl.etag.into_inner().as_deref().unwrap_or("\n").as_bytes())?; @@ -647,7 +622,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { assert!( self.downloads .eager - .insert(fetched.primary.path.clone(), fetched) + .insert(fetched.path.clone(), fetched) .is_none(), "download finished for already-finished path" ); @@ -659,7 +634,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { assert!( self.downloads .eager - .insert(fetched.primary.path.clone(), fetched) + .insert(fetched.path.clone(), fetched) .is_none(), "download finished for already-finished path" ); @@ -1502,11 +1477,6 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } } -struct MultiVersionFetched { - primary: Fetched, - others: HashSet, -} - // NOTE: what follows is lifted from src/cargo/core/package.rs and tweaked /// Helper for downloading crates. @@ -1526,7 +1496,7 @@ pub struct Downloads { results: Vec<(usize, Result<(), curl::Error>)>, /// Prefetch requests that we already have a response to. /// NOTE: Should this maybe be some kind of heap? - eager: BTreeMap, + eager: BTreeMap, /// The next ID to use for creating a token (see `Download::token`). next: usize, } @@ -1543,10 +1513,8 @@ struct Download { name: InternedString, /// The version requirements for the dependency line that triggered this fetch. - req: semver::VersionReq, - - /// Additional version requirements for same package. - additional_reqs: HashSet, + // NOTE: with https://github.com/steveklabnik/semver/issues/170 the HashSet is unnecessary + reqs: HashSet, /// Actual downloaded data, updated throughout the lifetime of this download. data: RefCell>, diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index d90d800cb13..8fdd380da1d 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -537,7 +537,7 @@ impl<'cfg> RegistryIndex<'cfg> { }; for (version, maybe_summary) in &mut summaries.versions { - if !fetched.version_req().matches(&version) { + if !fetched.version_reqs().any(|vr| vr.matches(&version)) { // The crate that pulled in this crate as a dependency did not care about this // particular version, so we don't need to walk its dependencies. // diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 3643013fc6d..2edaf295ad2 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -375,7 +375,8 @@ impl<'a> RegistryDependency<'a> { pub struct Fetched { name: InternedString, path: PathBuf, - req: semver::VersionReq, + // NOTE: with https://github.com/steveklabnik/semver/issues/170 the HashSet is unnecessary + reqs: HashSet, } impl Fetched { @@ -387,8 +388,8 @@ impl Fetched { &self.path } - pub fn version_req(&self) -> &semver::VersionReq { - &self.req + pub fn version_reqs(&self) -> impl Iterator { + self.reqs.iter() } } From 0e2ead6f163221c79558b09f99a890c68e15e1bf Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 30 Nov 2020 11:46:16 -0800 Subject: [PATCH 35/83] New time format --- src/cargo/sources/registry/http_remote.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index dfc6b7ae120..8a08ef76197 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -950,8 +950,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { ChangelogStrategy::FirstFetch { full: false } => { // We really just need the epoch number and file size, // which we can get at by fetching just the first line. - // "1 2019-10-18 23:51:23 ".len() == 22 - handle.range("0-22")?; + // "1 2019-10-18T23:51:23Z ".len() == 23 + handle.range("0-23")?; } ChangelogStrategy::FirstFetch { full: _ } => {} } @@ -1096,7 +1096,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } // We must assume we're in case 1. (ChangelogStrategy::FirstFetch { full }, _) => { - // Our request for just the start of the changelog (Range: 0-22) failed. + // Our request for just the start of the changelog (Range: 0-23) failed. // This probably means that the changelog is empty, but we do a full fetch // to make sure. assert!(!full); @@ -1150,7 +1150,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // epoch number! Should that happen, we need to detect it. // // Lines _should_ look like this: - // 1 2019-10-18 23:52:00 anyhow + // 1 2019-10-18T23:52:00Z anyhow // // That is: epoch date time crate. let mut parts = line.trim().split_whitespace(); From f11440e3bca9b2ca0d76c38ffaeb876049ae2721 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 30 Nov 2020 11:46:26 -0800 Subject: [PATCH 36/83] Avoid walking in circles --- src/cargo/sources/registry/index.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index 8fdd380da1d..f47d1583bfe 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -495,8 +495,17 @@ impl<'cfg> RegistryIndex<'cfg> { prefix }; + // Since we allow dependency cycles in crates, we may end up walking in circles forever if + // we just iteratively handled each candidate as we discovered it. The real resolver is + // smart about how it avoids walking endlessly in cycles, but in this simple greedy + // resolver we play fast-and-loose, and instead just keep track of dependencies we have + // already looked at and just don't walk them again. + let mut walked = HashSet::new(); + // Seed the prefetching with the root dependencies. for dep in deps { + walked.insert((dep.package_name(), dep.version_req().clone())); + let relative = relative(&*dep.package_name()); // NOTE: We do not use UncanonicalizedIter here or below because if the user gave a // misspelling, it's fine if we don't prefetch their misspelling. The resolver will be @@ -571,6 +580,11 @@ impl<'cfg> RegistryIndex<'cfg> { continue; } + if !walked.insert((dep.package_name(), dep.version_req().clone())) { + // We've already walked this dependency -- no need to do so again. + continue; + } + let relative = relative(&*dep.package_name()); load.prefetch( root, From e1f96cc3763964a387606011345754470e8e0d78 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 30 Nov 2020 15:05:09 -0800 Subject: [PATCH 37/83] Only allow HTTP registry under -Z http-registry --- src/cargo/core/features.rs | 2 ++ src/cargo/sources/config.rs | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/src/cargo/core/features.rs b/src/cargo/core/features.rs index ec179eb6bd2..cb5df4d9ba3 100644 --- a/src/cargo/core/features.rs +++ b/src/cargo/core/features.rs @@ -360,6 +360,7 @@ pub struct CliUnstable { pub namespaced_features: bool, pub weak_dep_features: bool, pub extra_link_arg: bool, + pub http_registry: bool, } fn deserialize_build_std<'de, D>(deserializer: D) -> Result>, D::Error> @@ -468,6 +469,7 @@ impl CliUnstable { "namespaced-features" => self.namespaced_features = parse_empty(k, v)?, "weak-dep-features" => self.weak_dep_features = parse_empty(k, v)?, "extra-link-arg" => self.extra_link_arg = parse_empty(k, v)?, + "http-registry" => self.http_registry = parse_empty(k, v)?, _ => bail!("unknown `-Z` flag specified: {}", k), } diff --git a/src/cargo/sources/config.rs b/src/cargo/sources/config.rs index ce7ef6c58da..1abb9f44d9d 100644 --- a/src/cargo/sources/config.rs +++ b/src/cargo/sources/config.rs @@ -207,7 +207,12 @@ restore the source replacement configuration to continue the build let mut srcs = Vec::new(); if let Some(registry) = def.registry { let url = url(®istry, &format!("source.{}.registry", name))?; + if url.scheme().starts_with("rfc+") { + if !self.config.cli_unstable().http_registry { + bail!("Usage of HTTP-based registries requires `-Z http-registry`") + } + // NOTE: it is illegal to use set_scheme to change rfc+http(s) to http(s). let url = url .to_string() From 5345b833d9fa41bf5805dafb830d868e91441401 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 30 Nov 2020 15:07:17 -0800 Subject: [PATCH 38/83] Add prefetch progress tracking It's not _super_ helpful at the moment since the number keeps fluctuating... --- src/cargo/sources/registry/http_remote.rs | 265 +++++++++++++++++----- 1 file changed, 202 insertions(+), 63 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 8a08ef76197..cb2f288d0b5 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -13,7 +13,8 @@ use crate::sources::registry::{ use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; use crate::util::paths; -use crate::util::{Config, Filesystem, Sha256}; +use crate::util::{self, Config, Filesystem, Progress, ProgressStyle, Sha256}; +use bytesize::ByteSize; use curl::easy::{Easy, HttpVersion, List}; use curl::multi::{EasyHandle, Multi}; use log::{debug, trace, warn}; @@ -26,6 +27,7 @@ use std::io::SeekFrom; use std::path::{Path, PathBuf}; use std::str; use std::time::Duration; +use std::time::Instant; const ETAG: &'static [u8] = b"ETag"; const LAST_MODIFIED: &'static [u8] = b"Last-Modified"; @@ -146,7 +148,7 @@ pub struct HttpRegistry<'cfg> { requested_update: bool, /// State for currently pending prefetch downloads. - downloads: Downloads, + downloads: Downloads<'cfg>, /// Does the config say that we can use HTTP multiplexing? multiplexing: bool, @@ -160,6 +162,67 @@ pub struct HttpRegistry<'cfg> { is_prefetching: bool, } +// NOTE: the download bits are lifted from src/cargo/core/package.rs and tweaked + +/// Helper for downloading crates. +pub struct Downloads<'cfg> { + config: &'cfg Config, + /// When a download is started, it is added to this map. The key is a + /// "token" (see `Download::token`). It is removed once the download is + /// finished. + pending: HashMap, + /// Set of paths currently being downloaded, mapped to their tokens. + /// This should stay in sync with `pending`. + pending_ids: HashMap, + /// The final result of each download. A pair `(token, result)`. This is a + /// temporary holding area, needed because curl can report multiple + /// downloads at once, but the main loop (`wait`) is written to only + /// handle one at a time. + results: Vec<(usize, Result<(), curl::Error>)>, + /// Prefetch requests that we already have a response to. + /// NOTE: Should this maybe be some kind of heap? + eager: BTreeMap, + /// The next ID to use for creating a token (see `Download::token`). + next: usize, + /// Progress bar. + progress: RefCell>>, + /// Number of downloads that have successfully finished. + downloads_finished: usize, + /// Total bytes for all successfully downloaded index files. + downloaded_bytes: u64, + /// Time when downloading started. + start: Instant, + /// Indicates *all* downloads were successful. + success: bool, +} + +struct Download { + /// The token for this download, used as the key of the `Downloads::pending` map + /// and stored in `EasyHandle` as well. + token: usize, + + /// The path of the package that we're downloading. + path: PathBuf, + + /// The name of the package that we're downloading. + name: InternedString, + + /// The version requirements for the dependency line that triggered this fetch. + // NOTE: with https://github.com/steveklabnik/semver/issues/170 the HashSet is unnecessary + reqs: HashSet, + + /// Actual downloaded data, updated throughout the lifetime of this download. + data: RefCell>, + + /// ETag and Last-Modified headers received from the server (if any). + etag: RefCell>, + last_modified: RefCell>, + + /// Statistics updated from the progress callback in libcurl. + total: Cell, + current: Cell, +} + impl<'cfg> HttpRegistry<'cfg> { pub fn new(source_id: SourceId, config: &'cfg Config, name: &str) -> HttpRegistry<'cfg> { HttpRegistry { @@ -172,7 +235,23 @@ impl<'cfg> HttpRegistry<'cfg> { http: RefCell::new(None), prefetch: Multi::new(), multiplexing: false, - downloads: Downloads::default(), + downloads: Downloads { + start: Instant::now(), + config, + next: 0, + pending: HashMap::new(), + pending_ids: HashMap::new(), + eager: BTreeMap::new(), + results: Vec::new(), + progress: RefCell::new(Some(Progress::with_style( + "Prefetching", + ProgressStyle::Ratio, + config, + ))), + downloads_finished: 0, + downloaded_bytes: 0, + success: false, + }, prefetched: HashSet::new(), requested_update: false, is_prefetching: false, @@ -456,15 +535,6 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { ); let mut reqs = HashSet::new(); reqs.insert(req.clone()); - let dl = Download { - token, - data: RefCell::new(Vec::new()), - path: path.to_path_buf(), - name, - reqs, - etag: RefCell::new(None), - last_modified: RefCell::new(None), - }; // Each write should go to self.downloads.pending[&token].data. // Since the write function must be 'static, we access downloads through a thread-local. @@ -484,7 +554,16 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { Ok(buf.len()) })?; - // Same goes for the header function -- it goes through thread-local storage. + // Same goes for the progress function -- it goes through thread-local storage. + handle.progress(true)?; + handle.progress_function(move |dl_total, dl_cur, _, _| { + tls::with(|downloads| match downloads { + Some(d) => d.progress(token, dl_total as u64, dl_cur as u64), + None => false, + }) + })?; + + // And ditto for the header function. handle.header_function(move |buf| { if let Some((tag, value)) = Self::handle_http_header(buf) { let is_etag = buf.eq_ignore_ascii_case(ETAG); @@ -518,12 +597,42 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { true })?; - // TODO: Track and display download progress (see `Downloads` in `core/pacakge.rs`). + // If the progress bar isn't enabled then it may be awhile before the + // first index file finishes downloading so we inform immediately that + // we're prefetching here. + if self.downloads.downloads_finished == 0 + && self.downloads.pending.is_empty() + && !self + .downloads + .progress + .borrow() + .as_ref() + .unwrap() + .is_enabled() + { + self.downloads + .config + .shell() + .status("Prefetching", "index files ...")?; + } + + let dl = Download { + token, + data: RefCell::new(Vec::new()), + path: path.to_path_buf(), + name, + reqs, + etag: RefCell::new(None), + last_modified: RefCell::new(None), + total: Cell::new(0), + current: Cell::new(0), + }; // Finally add the request we've lined up to the pool of requests that cURL manages. let mut handle = self.prefetch.add(handle)?; handle.set_token(token)?; self.downloads.pending.insert(dl.token, (dl, handle)); + self.downloads.tick(WhyTick::DownloadStarted)?; Ok(()) } @@ -571,7 +680,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // Walk all the requests that completed and handle their responses. // // This will ultimately add more replies to self.downloads.eager, which we'll - while let Some((token, result)) = results.pop() { + while let Some((token, result)) = self.downloads.results.pop() { trace!("{} finished with {:?}", token, result); let (dl, handle) = self @@ -600,6 +709,14 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { fetched.name, handle.response_code()? ); + + // This gets really noisy very quickly: + // self.config.shell().status("Prefetched", &fetched.name)?; + + self.downloads.downloads_finished += 1; + self.downloads.downloaded_bytes += dl.total.get(); + self.downloads.tick(WhyTick::DownloadFinished)?; + match code { 200 => { // We got data back, hooray! @@ -826,8 +943,6 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { true })?; - // TODO: Should we display transfer status here somehow? - transfer .perform() .chain_err(|| format!("failed to fetch index file `{}`", path.display()))?; @@ -1013,8 +1128,6 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { true })?; - // TODO: Should we show progress here somehow? - transfer .perform() .chain_err(|| format!("failed to fetch index changelog from `{}`", url))?; @@ -1477,51 +1590,77 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } } -// NOTE: what follows is lifted from src/cargo/core/package.rs and tweaked - -/// Helper for downloading crates. -#[derive(Default)] -pub struct Downloads { - /// When a download is started, it is added to this map. The key is a - /// "token" (see `Download::token`). It is removed once the download is - /// finished. - pending: HashMap, - /// Set of paths currently being downloaded, mapped to their tokens. - /// This should stay in sync with `pending`. - pending_ids: HashMap, - /// The final result of each download. A pair `(token, result)`. This is a - /// temporary holding area, needed because curl can report multiple - /// downloads at once, but the main loop (`wait`) is written to only - /// handle one at a time. - results: Vec<(usize, Result<(), curl::Error>)>, - /// Prefetch requests that we already have a response to. - /// NOTE: Should this maybe be some kind of heap? - eager: BTreeMap, - /// The next ID to use for creating a token (see `Download::token`). - next: usize, -} - -struct Download { - /// The token for this download, used as the key of the `Downloads::pending` map - /// and stored in `EasyHandle` as well. - token: usize, +impl<'cfg> Downloads<'cfg> { + fn progress(&self, token: usize, total: u64, cur: u64) -> bool { + let dl = &self.pending[&token].0; + dl.total.set(total); + dl.current.set(cur); + if self.tick(WhyTick::DownloadUpdate).is_err() { + return false; + } - /// The path of the package that we're downloading. - path: PathBuf, + true + } - /// The name of the package that we're downloading. - name: InternedString, + fn tick(&self, why: WhyTick) -> CargoResult<()> { + let mut progress = self.progress.borrow_mut(); + let progress = progress.as_mut().unwrap(); - /// The version requirements for the dependency line that triggered this fetch. - // NOTE: with https://github.com/steveklabnik/semver/issues/170 the HashSet is unnecessary - reqs: HashSet, + if let WhyTick::DownloadUpdate = why { + if !progress.update_allowed() { + return Ok(()); + } + } + let pending = self.pending.len(); + let msg = if pending == 1 { + format!("{} index file", pending) + } else { + format!("{} index files", pending) + }; + progress.print_now(&msg) + } +} - /// Actual downloaded data, updated throughout the lifetime of this download. - data: RefCell>, +#[derive(Copy, Clone)] +enum WhyTick { + DownloadStarted, + DownloadUpdate, + DownloadFinished, +} - /// ETag and Last-Modified headers received from the server (if any). - etag: RefCell>, - last_modified: RefCell>, +impl<'cfg> Drop for Downloads<'cfg> { + fn drop(&mut self) { + let progress = self.progress.get_mut().take().unwrap(); + // Don't print a download summary if we're not using a progress bar, + // we've already printed lots of `Prefetching...` items. + if !progress.is_enabled() { + return; + } + // If we didn't download anything, no need for a summary. + if self.downloads_finished == 0 { + return; + } + // If an error happened, let's not clutter up the output. + if !self.success { + return; + } + // pick the correct plural of crate(s) + let index_files = if self.downloads_finished == 1 { + "index file" + } else { + "index files" + }; + let status = format!( + "{} {} ({}) in {}", + self.downloads_finished, + index_files, + ByteSize(self.downloaded_bytes), + util::elapsed(self.start.elapsed()) + ); + // Clear progress before displaying final summary. + drop(progress); + drop(self.config.shell().status("Prefetched", status)); + } } mod tls { @@ -1531,16 +1670,16 @@ mod tls { thread_local!(static PTR: Cell = Cell::new(0)); - pub(crate) fn with(f: impl FnOnce(Option<&Downloads>) -> R) -> R { + pub(crate) fn with(f: impl FnOnce(Option<&Downloads<'_>>) -> R) -> R { let ptr = PTR.with(|p| p.get()); if ptr == 0 { f(None) } else { - unsafe { f(Some(&*(ptr as *const Downloads))) } + unsafe { f(Some(&*(ptr as *const Downloads<'_>))) } } } - pub(crate) fn set(dl: &Downloads, f: impl FnOnce() -> R) -> R { + pub(crate) fn set(dl: &Downloads<'_>, f: impl FnOnce() -> R) -> R { struct Reset<'a, T: Copy>(&'a Cell, T); impl<'a, T: Copy> Drop for Reset<'a, T> { @@ -1551,7 +1690,7 @@ mod tls { PTR.with(|p| { let _reset = Reset(p, p.get()); - p.set(dl as *const Downloads as usize); + p.set(dl as *const Downloads<'_> as usize); f() }) } From cd7a55315c861775175a512c265d294b6ab7386a Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 30 Nov 2020 15:08:01 -0800 Subject: [PATCH 39/83] Use Cargo.lock to seed prefetching This allows us to start downloads of a tonne of index files we otherwise would not discover until much later, which saves us many RTTs. On a dependency graph like that of cargo itself, it cut my download time to 1/5th. --- src/cargo/sources/registry/http_remote.rs | 31 ++++++++++++++++++++--- src/cargo/sources/registry/index.rs | 23 +++++++++++++++-- src/cargo/sources/registry/mod.rs | 9 +++++-- 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index cb2f288d0b5..b32e2c60635 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -388,7 +388,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { root: &Path, path: &Path, name: InternedString, - req: &semver::VersionReq, + req: Option<&semver::VersionReq>, ) -> CargoResult<()> { // A quick overview of what goes on below: // @@ -415,6 +415,14 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { || !self.requested_update || self.prefetched.contains(path) { + let req = if let Some(req) = req { + req + } else { + // We don't need to fetch this file, and the caller does not care about it, + // so we can just return. + return Ok(()); + }; + trace!("not prefetching fresh {}", name); // We already have this file locally, and we don't need to double-check it with @@ -477,10 +485,17 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { .pending .get_mut(token) .expect("invalid token"); - dl.reqs.insert(req.clone()); + + if let Some(req) = req { + dl.reqs.insert(req.clone()); + } + return Ok(()); } else if let Some(f) = self.downloads.eager.get_mut(path) { - f.reqs.insert(req.clone()); + if let Some(req) = req { + f.reqs.insert(req.clone()); + } + return Ok(()); } @@ -534,7 +549,9 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { "path queued for download more than once" ); let mut reqs = HashSet::new(); - reqs.insert(req.clone()); + if let Some(req) = req { + reqs.insert(req.clone()); + } // Each write should go to self.downloads.pending[&token].data. // Since the write function must be 'static, we access downloads through a thread-local. @@ -646,6 +663,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // TODO: Use the nightly BTreeMap::pop_first when stable. if let Some(path) = self.downloads.eager.keys().next().cloned() { let fetched = self.downloads.eager.remove(&path).unwrap(); + + if fetched.reqs.is_empty() { + // This index file was proactively fetched even though it did not appear as a + // dependency, so we should not yield it back for future exploration. + continue; + } return Ok(Some(fetched)); } diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index f47d1583bfe..94d93f6f116 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -463,6 +463,7 @@ impl<'cfg> RegistryIndex<'cfg> { pub fn prefetch( &mut self, deps: &mut dyn ExactSizeIterator>, + yanked_whitelist: &HashSet, load: &mut dyn RegistryData, ) -> CargoResult<()> { // For some registry backends, it's expensive to fetch each individual index file, and the @@ -502,6 +503,24 @@ impl<'cfg> RegistryIndex<'cfg> { // already looked at and just don't walk them again. let mut walked = HashSet::new(); + // Seed the prefetching with everything from the lockfile. + // + // This allows us to start downloads of a tonne of index files we otherwise would not + // discover until much later, which saves us many RTTs. On a dependency graph like that of + // cargo itself, it cut my download time to 1/5th. + // + // Note that the greedy fetch below actually ends up fetching additional dependencies even + // if nothing has change in the dependency graph. This is because the lockfile contains + // only the dependencies we actually _used_ last time. Thus, any dependencies that the + // greedy algorithm (erroneously) thinks we need will still need to be queued for download. + for pkg in yanked_whitelist { + if pkg.source_id() == self.source_id { + let name = pkg.name(); + let relative = relative(&*name); + load.prefetch(root, &Path::new(&relative), name, None)?; + } + } + // Seed the prefetching with the root dependencies. for dep in deps { walked.insert((dep.package_name(), dep.version_req().clone())); @@ -514,7 +533,7 @@ impl<'cfg> RegistryIndex<'cfg> { root, &Path::new(&relative), dep.package_name(), - dep.version_req(), + Some(dep.version_req()), )?; } @@ -590,7 +609,7 @@ impl<'cfg> RegistryIndex<'cfg> { root, Path::new(&relative), dep.package_name(), - dep.version_req(), + Some(dep.version_req()), )?; } } diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 2edaf295ad2..c3a987f4009 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -413,12 +413,16 @@ pub trait RegistryData { /// /// The package path, name, and dependency versions requirements are passed back from /// `next_prefetched` so that they can be used to inform future calls to `prefetch`. + /// + /// If `req` is `None`, the index file will be downloaded, but will not be yielded by + /// `next_prefetched`. This is useful if you already have transitive closure of index entries + /// you wish to fetch. fn prefetch( &mut self, _root: &Path, _path: &Path, _name: InternedString, - _req: &semver::VersionReq, + _req: Option<&semver::VersionReq>, ) -> CargoResult<()> { Ok(()) } @@ -636,7 +640,8 @@ impl<'cfg> Source for RegistrySource<'cfg> { deps: &mut dyn ExactSizeIterator>, ) -> CargoResult<()> { // TODO: conditional index update? - self.index.prefetch(deps, &mut *self.ops)?; + self.index + .prefetch(deps, &self.yanked_whitelist, &mut *self.ops)?; Ok(()) } From f97db1942cb8287fdd1ee3ab01ddb062588f7307 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 30 Nov 2020 15:43:45 -0800 Subject: [PATCH 40/83] Remove old note about pipelining --- src/cargo/sources/registry/http_remote.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index b32e2c60635..dd1f0d1e3fc 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -368,8 +368,6 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // Also note that pipelining is disabled as curl authors have indicated // that it's buggy, and we've empirically seen that it's buggy with HTTP // proxies. - // - // TODO: Is that still the case? We probably want pipelining here if possible. self.multiplexing = self.config.http_config()?.multiplexing.unwrap_or(true); self.prefetch From 9a11d490186c0a64314a884b57f82b602b3bef02 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 30 Nov 2020 17:33:57 -0800 Subject: [PATCH 41/83] Only prefetch once per invocation --- src/cargo/core/registry.rs | 15 ++++----------- src/cargo/core/resolver/dep_cache.rs | 11 ----------- src/cargo/core/resolver/mod.rs | 13 ++++++++++++- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/cargo/core/registry.rs b/src/cargo/core/registry.rs index 01f3b8b050f..61b63daedb9 100644 --- a/src/cargo/core/registry.rs +++ b/src/cargo/core/registry.rs @@ -17,10 +17,7 @@ use url::Url; /// See also `core::Source`. pub trait Registry { /// Give source the opportunity to batch pre-fetch dependency information. - fn prefetch( - &mut self, - deps: &mut dyn ExactSizeIterator>, - ) -> CargoResult<()>; + fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()>; /// Attempt to find the packages that match a dependency request. fn query( @@ -489,16 +486,12 @@ https://doc.rust-lang.org/cargo/reference/overriding-dependencies.html } impl<'cfg> Registry for PackageRegistry<'cfg> { - fn prefetch( - &mut self, - deps: &mut dyn ExactSizeIterator>, - ) -> CargoResult<()> { + fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()> { assert!(self.patches_locked); - let ndeps = deps.len(); // We need to partition deps so that we can prefetch dependencies from different // sources. Note that we do not prefetch from overrides. - let mut deps_per_source = HashMap::with_capacity(ndeps); + let mut deps_per_source = HashMap::new(); for dep in deps { // We need to check for patches, as they may tell us to look at a different source. // If they do, we want to make sure we don't access the original registry @@ -523,7 +516,7 @@ impl<'cfg> Registry for PackageRegistry<'cfg> { deps_per_source .entry(source_id) - .or_insert_with(|| Vec::with_capacity(ndeps)) + .or_insert_with(Vec::new) .push(dep); } diff --git a/src/cargo/core/resolver/dep_cache.rs b/src/cargo/core/resolver/dep_cache.rs index 1f292289be3..1f6c49ca0fb 100644 --- a/src/cargo/core/resolver/dep_cache.rs +++ b/src/cargo/core/resolver/dep_cache.rs @@ -19,7 +19,6 @@ use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; use crate::util::Config; use log::debug; -use std::borrow::Cow; use std::cmp::Ordering; use std::collections::{BTreeSet, HashMap, HashSet}; use std::rc::Rc; @@ -264,16 +263,6 @@ impl<'a> RegistryQueryer<'a> { // for our own dependencies. let (used_features, deps) = resolve_features(parent, candidate, opts)?; - // Then, allow the source to batch pre-fetch dependencies we may need. - self.registry - .prefetch(&mut deps.iter().map(|(d, _)| Cow::Borrowed(d))) - .chain_err(|| { - anyhow::format_err!( - "failed to prefetch dependencies of {}", - describe_path(&cx.parents.path_to_bottom(&candidate.package_id())), - ) - })?; - // Next, transform all dependencies into a list of possible candidates // which can satisfy that dependency. let mut deps = deps diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index 094c64065b1..e8b716d0746 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -47,6 +47,7 @@ //! that we're implementing something that probably shouldn't be allocating all //! over the place. +use std::borrow::Cow; use std::collections::{BTreeMap, HashMap, HashSet}; use std::mem; use std::rc::Rc; @@ -57,7 +58,7 @@ use log::{debug, trace}; use crate::core::PackageIdSpec; use crate::core::{Dependency, PackageId, Registry, Summary}; use crate::util::config::Config; -use crate::util::errors::CargoResult; +use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::profile; use self::context::Context; @@ -133,6 +134,16 @@ pub fn resolve( Some(config) => config.cli_unstable().minimal_versions, None => false, }; + + // First, allow the source to batch pre-fetch dependencies we may need. + registry + .prefetch( + &mut summaries + .iter() + .flat_map(|summary| summary.0.dependencies().iter().map(Cow::Borrowed)), + ) + .chain_err(|| "failed to prefetch dependencies")?; + let mut registry = RegistryQueryer::new(registry, replacements, try_to_use, minimal_versions, config); let cx = activate_deps_loop(cx, &mut registry, summaries, config)?; From 7535830c0a587eba0192bf71f8a29c3bd8741daf Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 30 Nov 2020 17:36:11 -0800 Subject: [PATCH 42/83] Fix tests to pass -Z flag and use 'nightly' --- crates/cargo-test-support/src/lib.rs | 1 + tests/testsuite/http_registry.rs | 95 +++++++++++++++++----------- 2 files changed, 58 insertions(+), 38 deletions(-) diff --git a/crates/cargo-test-support/src/lib.rs b/crates/cargo-test-support/src/lib.rs index bc02bf6b9db..3e58d7f770c 100644 --- a/crates/cargo-test-support/src/lib.rs +++ b/crates/cargo-test-support/src/lib.rs @@ -1525,6 +1525,7 @@ fn substitute_macros(input: &str) -> String { ("[DOCUMENTING]", " Documenting"), ("[FRESH]", " Fresh"), ("[UPDATING]", " Updating"), + ("[PREFETCHING]", " Prefetching"), ("[ADDING]", " Adding"), ("[REMOVING]", " Removing"), ("[DOCTEST]", " Doc-tests"), diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index 8c3684aade6..1d6467f3ebe 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -14,6 +14,13 @@ use cargo_test_support::{basic_manifest, project}; use std::fs; use std::path::Path; +fn cargo(p: &cargo_test_support::Project, s: &str) -> cargo_test_support::Execs { + let mut e = p.cargo(s); + e.arg("-Zhttp-registry") + .env("__CARGO_TEST_CHANNEL_OVERRIDE_DO_NOT_USE_THIS", "nightly"); + e +} + fn setup(config: RegistryServerConfiguration) -> RegistryServer { let server = serve_registry(registry_path(), config); @@ -77,10 +84,11 @@ fn simple(config: RegistryServerConfiguration) { Package::new("bar", "0.0.1").publish(); - p.cargo("build") + cargo(&p, "build") .with_stderr(&format!( "\ [UPDATING] `{reg}` index +[PREFETCHING] index files ... [DOWNLOADING] crates ... [DOWNLOADED] bar v0.0.1 (http registry `{reg}`) [COMPILING] bar v0.0.1 @@ -91,12 +99,13 @@ fn simple(config: RegistryServerConfiguration) { )) .run(); - p.cargo("clean").run(); + cargo(&p, "clean").run(); // Don't download a second time - p.cargo("build") + cargo(&p, "build") .with_stderr( "\ +[PREFETCHING] index files ... [COMPILING] bar v0.0.1 [COMPILING] foo v0.0.1 ([CWD]) [FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s @@ -128,10 +137,11 @@ fn deps(config: RegistryServerConfiguration) { Package::new("baz", "0.0.1").publish(); Package::new("bar", "0.0.1").dep("baz", "*").publish(); - p.cargo("build") + cargo(&p, "build") .with_stderr(&format!( "\ [UPDATING] `{reg}` index +[PREFETCHING] index files ... [DOWNLOADING] crates ... [DOWNLOADED] [..] v0.0.1 (http registry `{reg}`) [DOWNLOADED] [..] v0.0.1 (http registry `{reg}`) @@ -166,11 +176,12 @@ fn nonexistent(config: RegistryServerConfiguration) { .file("src/main.rs", "fn main() {}") .build(); - p.cargo("build") + cargo(&p, "build") .with_status(101) .with_stderr( "\ [UPDATING] [..] index +[PREFETCHING] index files ... error: no matching package named `nonexistent` found location searched: registry [..] required by package `foo v0.0.1 ([..])` @@ -201,7 +212,7 @@ fn update_registry(config: RegistryServerConfiguration) { .file("src/main.rs", "fn main() {}") .build(); - p.cargo("build") + cargo(&p, "build") .with_status(101) .with_stderr_contains( "\ @@ -214,10 +225,11 @@ required by package `foo v0.0.1 ([..])` Package::new("notyet", "0.0.1").publish(); - p.cargo("build") + cargo(&p, "build") .with_stderr(format!( "\ [UPDATING] `{reg}` index +[PREFETCHING] index files ... [DOWNLOADING] crates ... [DOWNLOADED] notyet v0.0.1 (http registry `{reg}`) [COMPILING] notyet v0.0.1 @@ -252,7 +264,7 @@ fn invalidate_index_on_rollover(config: RegistryServerConfiguration) { .file("src/main.rs", "fn main() {}") .build(); Package::new("a", "0.1.0").publish(); - p.cargo("build").run(); + cargo(&p, "build").run(); // Fish out the path to the .last-updated file let last_updated = if !matches!(config, RegistryServerConfiguration::NoChangelog) { @@ -309,10 +321,11 @@ fn invalidate_index_on_rollover(config: RegistryServerConfiguration) { // NOTE: we see UPDATING even when the changelog isn't used even though it is a no-op since // update_index is called whenever a version is not in the index cache. - p2.cargo("build") + cargo(&p2, "build") .with_stderr(format!( "\ [UPDATING] [..] +[PREFETCHING] index files ... [DOWNLOADING] crates ... [DOWNLOADED] a v0.1.1 (http registry `{reg}`) [COMPILING] a v0.1.1 @@ -359,10 +372,11 @@ fn invalidate_index_on_rollover(config: RegistryServerConfiguration) { // NOTE: again, we see UPDATING even when the changelog isn't used even though it is a no-op // since update_index is called whenever a version is not in the index cache. - p3.cargo("build") + cargo(&p3, "build") .with_stderr(format!( "\ [UPDATING] [..] +[PREFETCHING] index files ... [DOWNLOADING] crates ... [DOWNLOADED] a v0.1.2 (http registry `{reg}`) [COMPILING] a v0.1.2 @@ -402,7 +416,7 @@ fn update_publish_then_update(config: RegistryServerConfiguration) { .file("src/main.rs", "fn main() {}") .build(); Package::new("a", "0.1.0").publish(); - p.cargo("build").run(); + cargo(&p, "build").run(); // Next, publish a new package and back up the copy of the registry we just // created. @@ -429,7 +443,7 @@ fn update_publish_then_update(config: RegistryServerConfiguration) { ) .file("src/main.rs", "fn main() {}") .build(); - p2.cargo("build").run(); + cargo(&p2, "build").run(); registry.rm_rf(); t!(fs::rename(&backup, ®istry)); t!(fs::rename( @@ -440,10 +454,11 @@ fn update_publish_then_update(config: RegistryServerConfiguration) { // Finally, build the first project again (with our newer Cargo.lock) which // should force an update of the old registry, download the new crate, and // then build everything again. - p.cargo("build") + cargo(&p, "build") .with_stderr(format!( "\ [UPDATING] [..] +[PREFETCHING] index files ... [DOWNLOADING] crates ... [DOWNLOADED] a v0.1.1 (http registry `{reg}`) [COMPILING] a v0.1.1 @@ -481,32 +496,34 @@ fn update_multiple_packages(config: RegistryServerConfiguration) { Package::new("b", "0.1.0").publish(); Package::new("c", "0.1.0").publish(); - p.cargo("fetch").run(); + cargo(&p, "fetch").run(); Package::new("a", "0.1.1").publish(); Package::new("b", "0.1.1").publish(); Package::new("c", "0.1.1").publish(); - p.cargo("update -pa -pb") + cargo(&p, "update -pa -pb") .with_stderr( "\ [UPDATING] `[..]` index +[PREFETCHING] index files ... [UPDATING] a v0.1.0 -> v0.1.1 [UPDATING] b v0.1.0 -> v0.1.1 ", ) .run(); - p.cargo("update -pb -pc") + cargo(&p, "update -pb -pc") .with_stderr( "\ [UPDATING] `[..]` index +[PREFETCHING] index files ... [UPDATING] c v0.1.0 -> v0.1.1 ", ) .run(); - p.cargo("build") + cargo(&p, "build") .with_stderr_contains(format!("[DOWNLOADED] a v0.1.1 (http registry `{}`)", url)) .with_stderr_contains(format!("[DOWNLOADED] b v0.1.1 (http registry `{}`)", url)) .with_stderr_contains(format!("[DOWNLOADED] c v0.1.1 (http registry `{}`)", url)) @@ -557,7 +574,7 @@ fn bundled_crate_in_registry(config: RegistryServerConfiguration) { .file("bar/src/lib.rs", "") .publish(); - p.cargo("run").run(); + cargo(&p, "run").run(); } test_w_wo_changelog!(update_same_prefix_oh_my_how_was_this_a_bug); @@ -584,8 +601,8 @@ fn update_same_prefix_oh_my_how_was_this_a_bug(config: RegistryServerConfigurati .dep("foobar", "0.2.0") .publish(); - p.cargo("generate-lockfile").run(); - p.cargo("update -pfoobar --precise=0.2.0").run(); + cargo(&p, "generate-lockfile").run(); + cargo(&p, "update -pfoobar --precise=0.2.0").run(); } test_w_wo_changelog!(use_semver); @@ -609,7 +626,7 @@ fn use_semver(config: RegistryServerConfiguration) { Package::new("foo", "1.2.3-alpha.0").publish(); - p.cargo("build").run(); + cargo(&p, "build").run(); } test_w_wo_changelog!(use_semver_package_incorrectly); @@ -648,7 +665,7 @@ fn use_semver_package_incorrectly(config: RegistryServerConfiguration) { .file("b/src/main.rs", "fn main() {}") .build(); - p.cargo("build") + cargo(&p, "build") .with_status(101) .with_stderr( "\ @@ -689,10 +706,11 @@ fn only_download_relevant(config: RegistryServerConfiguration) { Package::new("bar", "0.1.0").publish(); Package::new("baz", "0.1.0").publish(); - p.cargo("build") + cargo(&p, "build") .with_stderr( "\ [UPDATING] `[..]` index +[PREFETCHING] index files ... [DOWNLOADING] crates ... [DOWNLOADED] baz v0.1.0 ([..]) [COMPILING] baz v0.1.0 @@ -727,7 +745,7 @@ fn resolve_and_backtracking(config: RegistryServerConfiguration) { .publish(); Package::new("foo", "0.1.0").publish(); - p.cargo("build").run(); + cargo(&p, "build").run(); } test_w_wo_changelog!(disallow_network); @@ -750,11 +768,11 @@ fn disallow_network(config: RegistryServerConfiguration) { .build(); // TODO: this should also check that we don't access the network for things we have in cache. - p.cargo("build --frozen") + cargo(&p, "build --frozen") .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies of package `bar v0.5.0 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `foo` @@ -806,7 +824,7 @@ fn add_dep_dont_update_registry(config: RegistryServerConfiguration) { Package::new("remote", "0.3.4").publish(); - p.cargo("build").run(); + cargo(&p, "build").run(); p.change_file( "Cargo.toml", @@ -822,9 +840,10 @@ fn add_dep_dont_update_registry(config: RegistryServerConfiguration) { "#, ); - p.cargo("build") + cargo(&p, "build") .with_stderr( "\ +[PREFETCHING] index files ... [COMPILING] bar v0.5.0 ([..]) [FINISHED] [..] ", @@ -866,7 +885,7 @@ fn bump_version_dont_update_registry(config: RegistryServerConfiguration) { Package::new("remote", "0.3.4").publish(); - p.cargo("build").run(); + cargo(&p, "build").run(); p.change_file( "Cargo.toml", @@ -881,7 +900,7 @@ fn bump_version_dont_update_registry(config: RegistryServerConfiguration) { "#, ); - p.cargo("build") + cargo(&p, "build") .with_stderr( "\ [COMPILING] bar v0.6.0 ([..]) @@ -928,7 +947,7 @@ fn toml_lies_but_index_is_truth(config: RegistryServerConfiguration) { .file("src/main.rs", "fn main() {}") .build(); - p.cargo("build -v").run(); + cargo(&p, "build -v").run(); } test_w_wo_changelog!(rename_deps_and_features); @@ -986,9 +1005,9 @@ fn rename_deps_and_features(config: RegistryServerConfiguration) { ) .build(); - p.cargo("build").run(); - p.cargo("build --features bar/foo01").run(); - p.cargo("build --features bar/another").run(); + cargo(&p, "build").run(); + cargo(&p, "build --features bar/foo01").run(); + cargo(&p, "build --features bar/another").run(); } test_w_wo_changelog!(ignore_invalid_json_lines); @@ -1015,7 +1034,7 @@ fn ignore_invalid_json_lines(config: RegistryServerConfiguration) { .file("src/lib.rs", "") .build(); - p.cargo("build").run(); + cargo(&p, "build").run(); } test_w_wo_changelog!(readonly_registry_still_works); @@ -1039,10 +1058,10 @@ fn readonly_registry_still_works(config: RegistryServerConfiguration) { .file("src/lib.rs", "") .build(); - p.cargo("generate-lockfile").run(); - p.cargo("fetch --locked").run(); + cargo(&p, "generate-lockfile").run(); + cargo(&p, "fetch --locked").run(); chmod_readonly(&paths::home(), true); - p.cargo("build").run(); + cargo(&p, "build").run(); // make sure we un-readonly the files afterwards so "cargo clean" can remove them (#6934) chmod_readonly(&paths::home(), false); From a3854136c9db0f7f4f631d56f3524b9755cca029 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 30 Nov 2020 18:17:06 -0800 Subject: [PATCH 43/83] Fix more tests from hoisting prefetching phase --- tests/testsuite/bad_config.rs | 8 ++++---- tests/testsuite/cargo_features.rs | 2 +- tests/testsuite/directory.rs | 2 +- tests/testsuite/git.rs | 4 ++-- tests/testsuite/git_auth.rs | 6 +++--- tests/testsuite/local_registry.rs | 2 +- tests/testsuite/offline.rs | 2 +- tests/testsuite/path.rs | 4 ++-- tests/testsuite/registry.rs | 2 +- tests/testsuite/workspaces.rs | 2 +- 10 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/testsuite/bad_config.rs b/tests/testsuite/bad_config.rs index bf927f6cade..7387dbe7f81 100644 --- a/tests/testsuite/bad_config.rs +++ b/tests/testsuite/bad_config.rs @@ -368,7 +368,7 @@ fn bad_git_dependency() { .with_stderr( "\ [UPDATING] git repository `file:///` -[ERROR] failed to prefetch dependencies of package `foo v0.0.0 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `foo` @@ -934,7 +934,7 @@ fn bad_source_config2() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies of package `foo v0.0.0 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `bar` @@ -980,7 +980,7 @@ fn bad_source_config3() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies of package `foo v0.0.0 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `bar` @@ -1028,7 +1028,7 @@ fn bad_source_config4() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies of package `foo v0.0.0 ([..])` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/cargo_features.rs b/tests/testsuite/cargo_features.rs index dda7a5f9a54..57ea6ca14b8 100644 --- a/tests/testsuite/cargo_features.rs +++ b/tests/testsuite/cargo_features.rs @@ -199,7 +199,7 @@ fn nightly_feature_requires_nightly_in_dep() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies of package `b v0.0.1 ([..])` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `a` diff --git a/tests/testsuite/directory.rs b/tests/testsuite/directory.rs index 0b4d7032f92..e4dcdec2258 100644 --- a/tests/testsuite/directory.rs +++ b/tests/testsuite/directory.rs @@ -653,7 +653,7 @@ fn git_override_requires_lockfile() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies of package `foo v0.0.1 ([..])` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `git` diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index fce51d32ca1..2a616091cd3 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -938,7 +938,7 @@ fn dep_with_bad_submodule() { "\ [UPDATING] git repository [..] [UPDATING] git submodule `file://[..]/dep2` -[ERROR] failed to prefetch dependencies of package `foo v0.5.0 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `dep1` @@ -2362,7 +2362,7 @@ fn invalid_git_dependency_manifest() { .with_stderr(&format!( "\ [UPDATING] git repository `{}` -[ERROR] failed to prefetch dependencies of package `foo v0.5.0 ([..])` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `dep1` diff --git a/tests/testsuite/git_auth.rs b/tests/testsuite/git_auth.rs index 4a8c60a8988..e2d69cd5ac6 100644 --- a/tests/testsuite/git_auth.rs +++ b/tests/testsuite/git_auth.rs @@ -137,7 +137,7 @@ fn http_auth_offered() { .with_stderr_contains(&format!( "\ [UPDATING] git repository `http://{addr}/foo/bar` -[ERROR] failed to prefetch dependencies of package `foo v0.0.1 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `bar` @@ -299,7 +299,7 @@ fn net_err_suggests_fetch_with_cli() { [UPDATING] git repository `ssh://needs-proxy.invalid/git` warning: spurious network error[..] warning: spurious network error[..] -[ERROR] failed to prefetch dependencies of package `foo v0.0.0 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `foo` @@ -368,7 +368,7 @@ fn instead_of_url_printed() { .with_stderr(&format!( "\ [UPDATING] git repository `https://foo.bar/foo/bar` -[ERROR] failed to prefetch dependencies of package `foo [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/local_registry.rs b/tests/testsuite/local_registry.rs index 522852b0189..d1de7c9e646 100644 --- a/tests/testsuite/local_registry.rs +++ b/tests/testsuite/local_registry.rs @@ -359,7 +359,7 @@ fn invalid_dir_bad() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies of package `foo v0.0.1 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/offline.rs b/tests/testsuite/offline.rs index bd5bbe25e83..afbcbfef822 100644 --- a/tests/testsuite/offline.rs +++ b/tests/testsuite/offline.rs @@ -270,7 +270,7 @@ fn cargo_compile_forbird_git_httpsrepo_offline() { .build(); p.cargo("build --offline").with_status(101).with_stderr("\ -[ERROR] failed to prefetch dependencies of package `foo v0.5.0 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `dep1` diff --git a/tests/testsuite/path.rs b/tests/testsuite/path.rs index 85b1c33c33d..e09cae89af5 100644 --- a/tests/testsuite/path.rs +++ b/tests/testsuite/path.rs @@ -511,7 +511,7 @@ fn error_message_for_missing_manifest() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies of package `foo v0.5.0 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `bar` @@ -1044,7 +1044,7 @@ fn deep_path_error() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies of package `b v0.1.0 [..]` +[ERROR] failed to get `c` as a dependency of package `b v0.1.0 [..]` ... which is depended on by `a v0.1.0 [..]` ... which is depended on by `foo v0.1.0 [..]` diff --git a/tests/testsuite/registry.rs b/tests/testsuite/registry.rs index 3fb4e76f831..9643f88e0ef 100644 --- a/tests/testsuite/registry.rs +++ b/tests/testsuite/registry.rs @@ -1562,7 +1562,7 @@ fn disallow_network() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies of package `bar v0.5.0 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `foo` diff --git a/tests/testsuite/workspaces.rs b/tests/testsuite/workspaces.rs index 355a8ee5306..9b0a5b3b20b 100644 --- a/tests/testsuite/workspaces.rs +++ b/tests/testsuite/workspaces.rs @@ -2302,7 +2302,7 @@ fn invalid_missing() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies of package `foo v0.1.0 [..]` +[ERROR] failed to prefetch dependencies Caused by: failed to load source for dependency `x` From de13064f72c0c396e1cc4986875ef818c2919c42 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 30 Nov 2020 19:02:56 -0800 Subject: [PATCH 44/83] Use the right API def for resolver-tests --- crates/resolver-tests/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/resolver-tests/src/lib.rs b/crates/resolver-tests/src/lib.rs index d2316035b0d..239507fdc32 100644 --- a/crates/resolver-tests/src/lib.rs +++ b/crates/resolver-tests/src/lib.rs @@ -128,7 +128,7 @@ pub fn resolve_with_config_raw( impl<'a> Registry for MyRegistry<'a> { fn prefetch( &mut self, - _deps: &mut dyn ExactSizeIterator>, + _deps: &mut dyn Iterator>, ) -> CargoResult<()> { // Doing nothing is a valid way to prefetch. Ok(()) From 6c2e28ff81ba51f2beeca953e59180a183dce080 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Tue, 1 Dec 2020 15:37:07 -0800 Subject: [PATCH 45/83] Move to sparse+http and add index= support --- src/cargo/core/source/source_id.rs | 4 ++-- src/cargo/ops/registry.rs | 9 +++++++-- src/cargo/sources/config.rs | 6 +++--- tests/testsuite/http_registry.rs | 2 +- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/cargo/core/source/source_id.rs b/src/cargo/core/source/source_id.rs index fb431034b42..bd39000c9d6 100644 --- a/src/cargo/core/source/source_id.rs +++ b/src/cargo/core/source/source_id.rs @@ -139,7 +139,7 @@ impl SourceId { Ok(SourceId::new(SourceKind::Registry, url)? .with_precise(Some("locked".to_string()))) } - "rfc" => { + "sparse" => { let url = url.into_url()?; Ok(SourceId::new(SourceKind::Http, url)?.with_precise(Some("locked".to_string()))) } @@ -600,7 +600,7 @@ impl<'a> fmt::Display for SourceIdAsUrl<'a> { kind: SourceKind::Http, ref url, .. - } => write!(f, "rfc+{}", url), + } => write!(f, "sparse+{}", url), SourceIdInner { kind: SourceKind::Registry, ref url, diff --git a/src/cargo/ops/registry.rs b/src/cargo/ops/registry.rs index a234d4cfc41..0087890d09c 100644 --- a/src/cargo/ops/registry.rs +++ b/src/cargo/ops/registry.rs @@ -838,8 +838,13 @@ fn get_source_id( ) -> CargoResult { match (reg, index) { (Some(r), _) => SourceId::alt_registry(config, r), - // TODO: this should go through from_url - (_, Some(i)) => SourceId::for_registry(&i.into_url()?), + (_, Some(i)) => { + if let Some(i) = i.strip_prefix("sparse+") { + SourceId::for_http_registry(&i.into_url()?) + } else { + SourceId::for_registry(&i.into_url()?) + } + } _ => { let map = SourceConfigMap::new(config)?; let src = map.load(SourceId::crates_io(config)?, &HashSet::new())?; diff --git a/src/cargo/sources/config.rs b/src/cargo/sources/config.rs index 1abb9f44d9d..bc313122a47 100644 --- a/src/cargo/sources/config.rs +++ b/src/cargo/sources/config.rs @@ -208,15 +208,15 @@ restore the source replacement configuration to continue the build if let Some(registry) = def.registry { let url = url(®istry, &format!("source.{}.registry", name))?; - if url.scheme().starts_with("rfc+") { + if url.scheme().starts_with("sparse+") { if !self.config.cli_unstable().http_registry { bail!("Usage of HTTP-based registries requires `-Z http-registry`") } - // NOTE: it is illegal to use set_scheme to change rfc+http(s) to http(s). + // NOTE: it is illegal to use set_scheme to change sparse+http(s) to http(s). let url = url .to_string() - .strip_prefix("rfc+") + .strip_prefix("sparse+") .unwrap() .into_url() .unwrap(); diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index 1d6467f3ebe..4e2de0725e9 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -35,7 +35,7 @@ fn setup(config: RegistryServerConfiguration) -> RegistryServer { replace-with = 'my-awesome-http-registry' [source.my-awesome-http-registry] - registry = 'rfc+http://{}' + registry = 'sparse+http://{}' ", server.addr() ) From 15aa11ffd3e1152e0142bcc0be0902df353138d6 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Tue, 1 Dec 2020 15:38:02 -0800 Subject: [PATCH 46/83] Move RegistryData::load to &mut self This will be needed for the new sparse registry to allow it to keep track of files it has already checked with the server. --- src/cargo/sources/registry/http_remote.rs | 2 +- src/cargo/sources/registry/index.rs | 2 +- src/cargo/sources/registry/local.rs | 2 +- src/cargo/sources/registry/mod.rs | 2 +- src/cargo/sources/registry/remote.rs | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index dd1f0d1e3fc..f9cb21216b5 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -837,7 +837,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } fn load( - &self, + &mut self, root: &Path, path: &Path, data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index 94d93f6f116..146723b4e46 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -647,7 +647,7 @@ impl Summaries { cache_root: &Path, relative: &Path, source_id: SourceId, - load: &dyn RegistryData, + load: &mut dyn RegistryData, config: &Config, ) -> CargoResult> { // First up, attempt to load the cache. This could fail for all manner diff --git a/src/cargo/sources/registry/local.rs b/src/cargo/sources/registry/local.rs index d35345eb86c..73730973dc3 100644 --- a/src/cargo/sources/registry/local.rs +++ b/src/cargo/sources/registry/local.rs @@ -47,7 +47,7 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> { } fn load( - &self, + &mut self, root: &Path, path: &Path, data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index c3a987f4009..e9255047c4a 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -433,7 +433,7 @@ pub trait RegistryData { } fn load( - &self, + &mut self, root: &Path, path: &Path, data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index e52d38b3756..f51546d8a34 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -159,7 +159,7 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { } fn load( - &self, + &mut self, _root: &Path, path: &Path, data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, From bad9d3ca5c09c7d11f7ac8232b33c72003e3af92 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Tue, 1 Dec 2020 15:39:22 -0800 Subject: [PATCH 47/83] Look for LAST_UPDATED in the right place --- src/cargo/sources/registry/http_remote.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index f9cb21216b5..aa0767200b2 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -307,6 +307,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { if !self.checked_for_at.get() { self.checked_for_at.set(true); let path = self.config.assert_package_cache_locked(&self.index_path); + let path = path.join(LAST_UPDATED_FILE); if path.exists() { let cl_state = paths::read(&path.join(LAST_UPDATED_FILE))?; let cl_state: ChangelogState = cl_state From ee6abeb7667bc05f892874826f811c7e5574dfd1 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Tue, 1 Dec 2020 15:40:05 -0800 Subject: [PATCH 48/83] Correct handling of 404s during prefetch --- src/cargo/sources/registry/http_remote.rs | 48 +++++++++++++++++------ 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index aa0767200b2..31e3ea784f9 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -490,12 +490,18 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } return Ok(()); - } else if let Some(f) = self.downloads.eager.get_mut(path) { - if let Some(req) = req { - f.reqs.insert(req.clone()); - } - + } else if self.prefetched.contains(path) { + // This must have been a 404 when we initially prefetched it. return Ok(()); + } else if let Some(f) = self.downloads.eager.get_mut(path) { + // We can't hit this case. + // The index file must exist for the path to be in `eager`, + // but since that's the case, we should have caught this + // in the eager check _in_ the pkg.exists() path. + unreachable!( + "index file `{}` is in eager, but file doesn't exist", + f.path.display() + ); } if was.is_some() { @@ -722,7 +728,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { }; assert!( self.prefetched.insert(fetched.path.clone()), - "downloaded the same path twice during prefetching" + "downloaded the index file `{}` twice during prefetching", + fetched.path.display(), ); let code = handle.response_code()?; @@ -781,10 +788,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { 404 => { // Not Found response. // The crate doesn't exist, so we simply do not yield it. + // Errors will eventually be yielded by load(). } 410 | 451 => { // The crate was deleted from the registry. - todo!(); + // Errors will eventually be yielded by load(). + todo!("we should delete the local index file here if it exists"); } code => { anyhow::bail!("server returned unexpected HTTP status code {}", code); @@ -869,8 +878,11 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let is_synchronized = self.at.get().0.is_synchronized(); let is_fresh = if is_synchronized { - if self.requested_update && path.ends_with("config.json") { - debug!("double-checking freshness of {} on update", path.display()); + if self.requested_update + && path.ends_with("config.json") + && !self.prefetched.contains(path) + { + debug!("double-checking freshness of config.json on update"); false } else { trace!( @@ -910,6 +922,9 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let last_modified = std::str::from_utf8(last_modified)?; Some((etag, last_modified, rest)) } + } else if self.prefetched.contains(path) { + // This must have been a 404. + anyhow::bail!("crate does not exist in the registry"); } else { assert!(!self.is_prefetching); None @@ -975,12 +990,19 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { list.append("If-Modified-Since:")?; list.append("If-None-Match:")?; handle.http_headers(list)?; + let response_code = handle.response_code()?; + drop(handle); - debug!( - "index file downloaded with status code {}", - handle.response_code()? + debug!("index file downloaded with status code {}", response_code,); + + // Make sure we don't double-check the file again if it's loaded again. + assert!( + self.prefetched.insert(path.to_path_buf()), + "downloaded the index file `{}` twice", + path.display(), ); - match handle.response_code()? { + + match response_code { 200 => {} 304 => { // Not Modified response. From a9f80f8a18ffe18427f000ae9e4889313da8838f Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Tue, 1 Dec 2020 15:40:22 -0800 Subject: [PATCH 49/83] Handle offline errors earlier --- src/cargo/sources/registry/http_remote.rs | 6 ++++++ tests/testsuite/http_registry.rs | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 31e3ea784f9..fa4730ad965 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -1053,6 +1053,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { if self.config.cli_unstable().no_index_update { return Ok(()); } + if self.config.frozen() { + anyhow::bail!("attempting to update a http repository, but --frozen was specified") + } + if !self.config.network_allowed() { + anyhow::bail!("can't update a http repository in offline mode") + } // Make sure the index is only updated once per session since it is an // expensive operation. This generally only happens when the resolver // is run multiple times, such as during `cargo publish`. diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index 4e2de0725e9..551330b2860 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -784,7 +784,7 @@ Caused by: failed to update replaced source registry `https://github.com/rust-lang/crates.io-index` Caused by: - attempting to make an HTTP request, but --frozen was specified + attempting to update a http repository, but --frozen was specified ", ) .run(); From 38ad840c7c25a305bf3c51e6101defe11c837919 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Tue, 1 Dec 2020 15:40:36 -0800 Subject: [PATCH 50/83] Actually show a progress bar It does jump a bit, but it's better than what was. --- src/cargo/sources/registry/http_remote.rs | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index fa4730ad965..f262521b652 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -1653,21 +1653,19 @@ impl<'cfg> Downloads<'cfg> { } fn tick(&self, why: WhyTick) -> CargoResult<()> { + if let WhyTick::DownloadUpdate = why { + // We don't show progress for individual downloads. + return Ok(()); + } + let mut progress = self.progress.borrow_mut(); let progress = progress.as_mut().unwrap(); - if let WhyTick::DownloadUpdate = why { - if !progress.update_allowed() { - return Ok(()); - } - } - let pending = self.pending.len(); - let msg = if pending == 1 { - format!("{} index file", pending) - } else { - format!("{} index files", pending) - }; - progress.print_now(&msg) + // NOTE: should we show something about self.eager? + progress.tick( + self.downloads_finished, + self.downloads_finished + self.pending.len(), + ) } } From 508ffa6825c2ba008e8d802be4968de3fe232642 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Tue, 1 Dec 2020 16:08:19 -0800 Subject: [PATCH 51/83] Mention -Zhttp-registries in help output --- src/bin/cargo/cli.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bin/cargo/cli.rs b/src/bin/cargo/cli.rs index e24f583f17f..e232c553916 100644 --- a/src/bin/cargo/cli.rs +++ b/src/bin/cargo/cli.rs @@ -43,7 +43,8 @@ Available unstable (nightly-only) flags: -Z doctest-xcompile -- Compile and run doctests for non-host target using runner config -Z terminal-width -- Provide a terminal width to rustc for error truncation -Z namespaced-features -- Allow features with `dep:` prefix - -Z weak-dep-features -- Allow `dep_name?/feature` feature syntax + -Z weak-dep-features -- Allow `dep_name?/feature` feature syntax + -Z http-registries -- Support HTTP-based crate registries Run with 'cargo -Z [FLAG] [SUBCOMMAND]'" ); From 517a3e2b427307c841ced56c1250d5d779f1126a Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Tue, 1 Dec 2020 16:09:21 -0800 Subject: [PATCH 52/83] Fix botched manual diff --- src/cargo/sources/registry/http_remote.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index f262521b652..6bba9703d92 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -309,7 +309,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let path = self.config.assert_package_cache_locked(&self.index_path); let path = path.join(LAST_UPDATED_FILE); if path.exists() { - let cl_state = paths::read(&path.join(LAST_UPDATED_FILE))?; + let cl_state = paths::read(&path)?; let cl_state: ChangelogState = cl_state .parse() .map_err(|e| anyhow::anyhow!("{}", e)) From 666895b656ca255d91baae902d330b3b02d09da9 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Tue, 1 Dec 2020 16:24:05 -0800 Subject: [PATCH 53/83] Better name for set of already-checked files --- src/cargo/sources/registry/http_remote.rs | 26 +++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 6bba9703d92..bfdd8f51e39 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -153,10 +153,10 @@ pub struct HttpRegistry<'cfg> { /// Does the config say that we can use HTTP multiplexing? multiplexing: bool, - /// What paths have we already prefetched? + /// What paths have we already fetched since the last index update? /// - /// We do not need to double-check any of these index files -- the prefetch stage already did. - prefetched: HashSet, + /// We do not need to double-check any of these index files since we have already done so. + fresh: HashSet, /// If we are currently prefetching, all calls to RegistryData::load should go to disk. is_prefetching: bool, @@ -252,7 +252,7 @@ impl<'cfg> HttpRegistry<'cfg> { downloaded_bytes: 0, success: false, }, - prefetched: HashSet::new(), + fresh: HashSet::new(), requested_update: false, is_prefetching: false, } @@ -412,7 +412,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let was = if pkg.exists() { if self.at.get().0.is_synchronized() || !self.requested_update - || self.prefetched.contains(path) + || self.fresh.contains(path) { let req = if let Some(req) = req { req @@ -435,7 +435,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { o.get_mut().reqs.insert(req.clone()); } Entry::Vacant(v) => { - if self.prefetched.contains(path) { + if self.fresh.contains(path) { debug!("yielding already-prefetched {}", name); } let mut reqs = HashSet::new(); @@ -490,7 +490,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } return Ok(()); - } else if self.prefetched.contains(path) { + } else if self.fresh.contains(path) { // This must have been a 404 when we initially prefetched it. return Ok(()); } else if let Some(f) = self.downloads.eager.get_mut(path) { @@ -727,7 +727,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { reqs: dl.reqs, }; assert!( - self.prefetched.insert(fetched.path.clone()), + self.fresh.insert(fetched.path.clone()), "downloaded the index file `{}` twice during prefetching", fetched.path.display(), ); @@ -880,7 +880,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let is_fresh = if is_synchronized { if self.requested_update && path.ends_with("config.json") - && !self.prefetched.contains(path) + && !self.fresh.contains(path) { debug!("double-checking freshness of config.json on update"); false @@ -903,7 +903,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } else if self.is_prefetching { trace!("using local {} in load while prefetching", path.display()); true - } else if self.prefetched.contains(path) { + } else if self.fresh.contains(path) { trace!( "using local {} as it was already prefetched", path.display() @@ -922,7 +922,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { let last_modified = std::str::from_utf8(last_modified)?; Some((etag, last_modified, rest)) } - } else if self.prefetched.contains(path) { + } else if self.fresh.contains(path) { // This must have been a 404. anyhow::bail!("crate does not exist in the registry"); } else { @@ -997,7 +997,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // Make sure we don't double-check the file again if it's loaded again. assert!( - self.prefetched.insert(path.to_path_buf()), + self.fresh.insert(path.to_path_buf()), "downloaded the index file `{}` twice", path.display(), ); @@ -1073,7 +1073,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // Make sure that subsequent loads double-check with the server again. self.requested_update = true; - self.prefetched.clear(); + self.fresh.clear(); self.prepare()?; let path = self.config.assert_package_cache_locked(&self.index_path); From b41dfbb53bd69d3c4298ca7e851182799505563f Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 2 Dec 2020 11:15:43 -0800 Subject: [PATCH 54/83] Make sparse+ work in more places Thanks @Nemo157 --- src/cargo/core/dependency.rs | 4 ++-- src/cargo/core/source/source_id.rs | 7 ++++++- src/cargo/sources/registry/mod.rs | 6 +++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/cargo/core/dependency.rs b/src/cargo/core/dependency.rs index 61795936dc2..b43276a7fba 100644 --- a/src/cargo/core/dependency.rs +++ b/src/cargo/core/dependency.rs @@ -60,7 +60,7 @@ struct SerializedDependency<'a> { target: Option<&'a Platform>, /// The registry URL this dependency is from. /// If None, then it comes from the default registry (crates.io). - registry: Option<&'a str>, + registry: Option, } impl ser::Serialize for Dependency { @@ -79,7 +79,7 @@ impl ser::Serialize for Dependency { features: self.features(), target: self.platform(), rename: self.explicit_name_in_toml().map(|s| s.as_str()), - registry: registry_id.as_ref().map(|sid| sid.url().as_str()), + registry: registry_id.as_ref().map(|sid| sid.as_url().to_string()), } .serialize(s) } diff --git a/src/cargo/core/source/source_id.rs b/src/cargo/core/source/source_id.rs index bd39000c9d6..ef5f01d83a0 100644 --- a/src/cargo/core/source/source_id.rs +++ b/src/cargo/core/source/source_id.rs @@ -207,8 +207,13 @@ impl SourceId { pub fn alt_registry(config: &Config, key: &str) -> CargoResult { let url = config.get_registry_index(key)?; + let (kind, url) = if let Some(url) = url.to_string().strip_prefix("sparse+") { + (SourceKind::Http, url.into_url()?) + } else { + (SourceKind::Registry, url) + }; Ok(SourceId::wrap(SourceIdInner { - kind: SourceKind::Registry, + kind, canonical_url: CanonicalUrl::new(&url)?, url, precise: None, diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 8e5f731c854..c84c253d5c7 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -322,7 +322,11 @@ impl<'a> RegistryDependency<'a> { } = self; let id = if let Some(registry) = ®istry { - SourceId::for_registry(®istry.into_url()?)? + if let Some(registry) = registry.strip_prefix("sparse+") { + SourceId::for_http_registry(®istry.into_url()?)? + } else { + SourceId::for_registry(®istry.into_url()?)? + } } else { default }; From 3a71c4cf91e71786e260f3f122519b98188158b2 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 2 Dec 2020 11:16:23 -0800 Subject: [PATCH 55/83] Keep iterating while there's work --- src/cargo/sources/registry/http_remote.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index bfdd8f51e39..c223e648912 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -660,7 +660,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } fn next_prefetched(&mut self) -> CargoResult> { - while !self.downloads.pending.is_empty() || self.downloads.eager.is_empty() { + while !self.downloads.pending.is_empty() || !self.downloads.eager.is_empty() { // We may already have packages that are ready to go. This takes care of grabbing the // next of those, while ensuring that we yield every distinct version requirement for // each package. From 165f01d9e5e94fa63c2ad68d7e2a16cc318fccd9 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 2 Dec 2020 11:17:35 -0800 Subject: [PATCH 56/83] Don't fetch transitive dev-dependencies --- src/cargo/sources/registry/http_remote.rs | 11 +++++++++++ src/cargo/sources/registry/index.rs | 9 ++++++++- src/cargo/sources/registry/mod.rs | 2 ++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index c223e648912..db11f6a7855 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -211,6 +211,9 @@ struct Download { // NOTE: with https://github.com/steveklabnik/semver/issues/170 the HashSet is unnecessary reqs: HashSet, + /// True if this download is of a direct dependency of the root crate. + is_transitive: bool, + /// Actual downloaded data, updated throughout the lifetime of this download. data: RefCell>, @@ -388,6 +391,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { path: &Path, name: InternedString, req: Option<&semver::VersionReq>, + is_transitive: bool, ) -> CargoResult<()> { // A quick overview of what goes on below: // @@ -433,6 +437,9 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { match self.downloads.eager.entry(path.to_path_buf()) { Entry::Occupied(mut o) => { o.get_mut().reqs.insert(req.clone()); + // We trust a signal that something is _not_ transitive + // more than a signal that it is transitive. + o.get_mut().is_transitive &= is_transitive; } Entry::Vacant(v) => { if self.fresh.contains(path) { @@ -444,6 +451,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { path: path.to_path_buf(), name, reqs, + is_transitive, }); } } @@ -488,6 +496,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { if let Some(req) = req { dl.reqs.insert(req.clone()); } + dl.is_transitive &= is_transitive; return Ok(()); } else if self.fresh.contains(path) { @@ -644,6 +653,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { path: path.to_path_buf(), name, reqs, + is_transitive, etag: RefCell::new(None), last_modified: RefCell::new(None), total: Cell::new(0), @@ -725,6 +735,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { path: dl.path, name: dl.name, reqs: dl.reqs, + is_transitive: dl.is_transitive, }; assert!( self.fresh.insert(fetched.path.clone()), diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index 146723b4e46..af44af56947 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -517,7 +517,7 @@ impl<'cfg> RegistryIndex<'cfg> { if pkg.source_id() == self.source_id { let name = pkg.name(); let relative = relative(&*name); - load.prefetch(root, &Path::new(&relative), name, None)?; + load.prefetch(root, &Path::new(&relative), name, None, true)?; } } @@ -534,6 +534,7 @@ impl<'cfg> RegistryIndex<'cfg> { &Path::new(&relative), dep.package_name(), Some(dep.version_req()), + false, )?; } @@ -599,6 +600,11 @@ impl<'cfg> RegistryIndex<'cfg> { continue; } + // Don't pull in dev-dependencies of transitive dependencies. + if fetched.is_transitive && !dep.is_transitive() { + continue; + } + if !walked.insert((dep.package_name(), dep.version_req().clone())) { // We've already walked this dependency -- no need to do so again. continue; @@ -610,6 +616,7 @@ impl<'cfg> RegistryIndex<'cfg> { Path::new(&relative), dep.package_name(), Some(dep.version_req()), + true, )?; } } diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index c84c253d5c7..8830a04f1cd 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -381,6 +381,7 @@ pub struct Fetched { path: PathBuf, // NOTE: with https://github.com/steveklabnik/semver/issues/170 the HashSet is unnecessary reqs: HashSet, + is_transitive: bool, } impl Fetched { @@ -427,6 +428,7 @@ pub trait RegistryData { _path: &Path, _name: InternedString, _req: Option<&semver::VersionReq>, + _is_transitive: bool, ) -> CargoResult<()> { Ok(()) } From 1bcbc6396bceac9b90b6525661723f3f8bd702ef Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 2 Dec 2020 11:20:04 -0800 Subject: [PATCH 57/83] Add early top for dependency version walk --- src/cargo/sources/registry/index.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index af44af56947..fd931f2da38 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -565,6 +565,7 @@ impl<'cfg> RegistryIndex<'cfg> { } }; + let mut matched = false; for (version, maybe_summary) in &mut summaries.versions { if !fetched.version_reqs().any(|vr| vr.matches(&version)) { // The crate that pulled in this crate as a dependency did not care about this @@ -578,9 +579,26 @@ impl<'cfg> RegistryIndex<'cfg> { // // Note that another crate in the dependency closure might still pull in this // version because that crate has a different set of requirements. + + if matched { + // This is a sneaky optimization. We know that the summaries come in order + // from newest to oldest. If our version requirement has matched at least + // once, then it's highly unlikely that a _later_ (i.e., older) version + // will match once the current version does not match. So, we can cut off + // the search early. + // + // The exception to this would be if a crate has a dependency like: + // + // >=1.0 || =0.5 + // + // But that seems highly unlikely. If that happens, it's fine if we don't + // prefetch transitive dependencies of 0.5 -- that'll be handled by load(). + break; + } continue; } + matched = true; let summary = maybe_summary.parse(self.config, &summaries.raw_data, self.source_id)?; From ced491ad72fe32dce66c549065b791f434de3ec7 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 2 Dec 2020 11:20:36 -0800 Subject: [PATCH 58/83] Improve trace output and some comments --- src/cargo/sources/registry/http_remote.rs | 13 ++++++++++--- src/cargo/sources/registry/index.rs | 20 +++++++++++++++++++- src/cargo/sources/registry/mod.rs | 2 +- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index db11f6a7855..8c272bca547 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -493,7 +493,9 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { .get_mut(token) .expect("invalid token"); + trace!("amending dependency that we're already fetching: {}", name); if let Some(req) = req { + trace!("adding req {}", req); dl.reqs.insert(req.clone()); } dl.is_transitive &= is_transitive; @@ -572,7 +574,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // That thread-local is set up in `next_prefetched` when it calls self.prefetch.perform, // which is what ultimately calls this method. handle.write_function(move |buf| { - trace!("{} - {} bytes of data", token, buf.len()); + // trace!("{} - {} bytes of data", token, buf.len()); tls::with(|downloads| { if let Some(downloads) = downloads { downloads.pending[&token] @@ -682,8 +684,13 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { if fetched.reqs.is_empty() { // This index file was proactively fetched even though it did not appear as a // dependency, so we should not yield it back for future exploration. + trace!( + "not yielding fetch result for {} with no requirements", + fetched.name + ); continue; } + trace!("yielding fetch result for {}", fetched.name); return Ok(Some(fetched)); } @@ -695,12 +702,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { ); // Note the `tls::set` here which sets up the thread-local storage needed to access // self.downloads from `write_function` and `header_function` above. - let remaining_in_multi = tls::set(&self.downloads, || { + let _remaining_in_multi = tls::set(&self.downloads, || { self.prefetch .perform() .chain_err(|| "failed to perform http requests") })?; - trace!("handles remaining: {}", remaining_in_multi); + // trace!("handles remaining: {}", _remaining_in_multi); // Walk all the messages cURL came across in case anything completed. let results = &mut self.downloads.results; diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index fd931f2da38..3432ab64e81 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -516,14 +516,24 @@ impl<'cfg> RegistryIndex<'cfg> { for pkg in yanked_whitelist { if pkg.source_id() == self.source_id { let name = pkg.name(); + log::trace!("prefetching from lockfile: {}", name); let relative = relative(&*name); load.prefetch(root, &Path::new(&relative), name, None, true)?; } } - // Seed the prefetching with the root dependencies. + // Also seed the prefetching with the root dependencies. + // + // It's important that we do this _before_ we handle any responses to downloads, + // since all the prefetches from above are marked as being transitive. We need to mark + // direct depenendencies as such before we start iterating, otherwise we will erroneously + // ignore their dev-dependencies when they're yielded by next_prefetched. for dep in deps { walked.insert((dep.package_name(), dep.version_req().clone())); + log::trace!( + "prefetching from direct dependencies: {}", + dep.package_name() + ); let relative = relative(&*dep.package_name()); // NOTE: We do not use UncanonicalizedIter here or below because if the user gave a @@ -541,6 +551,7 @@ impl<'cfg> RegistryIndex<'cfg> { // Now, continuously iterate by walking dependencies we've loaded and fetching the index // entry for _their_ dependencies. while let Some(fetched) = load.next_prefetched()? { + log::trace!("got prefetched {}", fetched.name); let summaries = if let Some(s) = self.summaries_cache.get_mut(&fetched.name()) { s } else { @@ -567,6 +578,7 @@ impl<'cfg> RegistryIndex<'cfg> { let mut matched = false; for (version, maybe_summary) in &mut summaries.versions { + log::trace!("consider prefetching version {}", version); if !fetched.version_reqs().any(|vr| vr.matches(&version)) { // The crate that pulled in this crate as a dependency did not care about this // particular version, so we don't need to walk its dependencies. @@ -593,6 +605,7 @@ impl<'cfg> RegistryIndex<'cfg> { // // But that seems highly unlikely. If that happens, it's fine if we don't // prefetch transitive dependencies of 0.5 -- that'll be handled by load(). + log::trace!("stopping version search on first non-match after match"); break; } continue; @@ -620,6 +633,10 @@ impl<'cfg> RegistryIndex<'cfg> { // Don't pull in dev-dependencies of transitive dependencies. if fetched.is_transitive && !dep.is_transitive() { + log::trace!( + "not prefetching transitive dev-dependency {}", + dep.package_name() + ); continue; } @@ -628,6 +645,7 @@ impl<'cfg> RegistryIndex<'cfg> { continue; } + log::trace!("prefetching transitive dependency {}", dep.package_name()); let relative = relative(&*dep.package_name()); load.prefetch( root, diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 8830a04f1cd..88a16d3c946 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -379,7 +379,7 @@ impl<'a> RegistryDependency<'a> { pub struct Fetched { name: InternedString, path: PathBuf, - // NOTE: with https://github.com/steveklabnik/semver/issues/170 the HashSet is unnecessary + // NOTE: we can get rid of the HashSet (and other complexity) if we had VersionReq::union reqs: HashSet, is_transitive: bool, } From 8098e60fea061800d562cf047d9e253ef3cb648a Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 2 Dec 2020 14:34:11 -0800 Subject: [PATCH 59/83] Remove invalid optimization The index yields versions in chronological order, but 0.2.7 can still be released as a hotfix release after 0.3.0 was released. Similarly, we may depend on something like 1.0 _and_ 2.0 of a crate through different dependency paths. _Maybe_ there's an optimization in there somewhere, but it's not as simple as this. --- src/cargo/sources/registry/index.rs | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index 3432ab64e81..c9d8725a850 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -576,7 +576,6 @@ impl<'cfg> RegistryIndex<'cfg> { } }; - let mut matched = false; for (version, maybe_summary) in &mut summaries.versions { log::trace!("consider prefetching version {}", version); if !fetched.version_reqs().any(|vr| vr.matches(&version)) { @@ -591,27 +590,9 @@ impl<'cfg> RegistryIndex<'cfg> { // // Note that another crate in the dependency closure might still pull in this // version because that crate has a different set of requirements. - - if matched { - // This is a sneaky optimization. We know that the summaries come in order - // from newest to oldest. If our version requirement has matched at least - // once, then it's highly unlikely that a _later_ (i.e., older) version - // will match once the current version does not match. So, we can cut off - // the search early. - // - // The exception to this would be if a crate has a dependency like: - // - // >=1.0 || =0.5 - // - // But that seems highly unlikely. If that happens, it's fine if we don't - // prefetch transitive dependencies of 0.5 -- that'll be handled by load(). - log::trace!("stopping version search on first non-match after match"); - break; - } continue; } - matched = true; let summary = maybe_summary.parse(self.config, &summaries.raw_data, self.source_id)?; From 3f746b318ec9980b6c3ca2af396d08e3d189216f Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Thu, 3 Dec 2020 09:19:35 -0800 Subject: [PATCH 60/83] Fetch config.json from the right directory --- src/cargo/sources/registry/http_remote.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 8c272bca547..f8cbb6e27d2 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -1054,9 +1054,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { fn config(&mut self) -> CargoResult> { debug!("loading config"); self.prepare()?; - self.config.assert_package_cache_locked(&self.index_path); + let path = self + .config + .assert_package_cache_locked(&self.index_path) + .to_path_buf(); let mut config = None; - self.load(Path::new(""), Path::new("config.json"), &mut |json| { + self.load(&path, Path::new("config.json"), &mut |json| { config = Some(serde_json::from_slice(json)?); Ok(()) })?; From 3a8b69d3a5fbdd83d3dd6bba4bc78217003e3c34 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Thu, 3 Dec 2020 09:41:52 -0800 Subject: [PATCH 61/83] Fix up alt_registry tests with registry+ prefix --- tests/testsuite/alt_registry.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testsuite/alt_registry.rs b/tests/testsuite/alt_registry.rs index 748d5dcaaa5..cd9038a8967 100644 --- a/tests/testsuite/alt_registry.rs +++ b/tests/testsuite/alt_registry.rs @@ -795,7 +795,7 @@ fn alt_reg_metadata() { "uses_default_features": true, "features": [], "target": null, - "registry": "file:[..]/alternative-registry" + "registry": "registry+file:[..]/alternative-registry" }, { "name": "iodep", @@ -948,7 +948,7 @@ fn alt_reg_metadata() { "uses_default_features": true, "features": [], "target": null, - "registry": "file:[..]/alternative-registry" + "registry": "registry+file:[..]/alternative-registry" }, { "name": "iodep", @@ -997,7 +997,7 @@ fn alt_reg_metadata() { "uses_default_features": true, "features": [], "target": null, - "registry": "file:[..]/alternative-registry" + "registry": "registry+file:[..]/alternative-registry" } ], "targets": "{...}", @@ -1092,7 +1092,7 @@ fn unknown_registry() { "uses_default_features": true, "features": [], "target": null, - "registry": "file:[..]/alternative-registry" + "registry": "registry+file:[..]/alternative-registry" } ], "targets": "{...}", From fa296f0a8fce8366a5f0c1565326abb90cab19ba Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Thu, 3 Dec 2020 14:09:48 -0800 Subject: [PATCH 62/83] Treat 403 as 404 from a privacy-sensitive server --- src/cargo/sources/registry/http_remote.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index f8cbb6e27d2..1b1c2a5da5f 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -803,8 +803,10 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { "download finished for already-finished path" ); } - 404 => { + 403 | 404 => { // Not Found response. + // We treat Forbidden as just being another expression for 404 + // from a server that does not want to reveal file names. // The crate doesn't exist, so we simply do not yield it. // Errors will eventually be yielded by load(). } @@ -1028,7 +1030,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { was.expect("conditional request response implies we have local index file"); return data(bytes); } - 404 | 410 | 451 => { + 403 | 404 | 410 | 451 => { // The crate was deleted from the registry. if was.is_some() { // Make sure we delete the local index file. @@ -1250,7 +1252,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { Some(b) => b, } } - 404 => { + 403 | 404 => { // The server does not have a changelog. if self.at.get().0.is_synchronized() { // We used to have a changelog, but now we don't. It's important that we From 88649de7e2865de7c94eeb8bc55a334b0927fa13 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Thu, 3 Dec 2020 14:10:14 -0800 Subject: [PATCH 63/83] Be more helpful about HTTP status code errors --- src/cargo/sources/registry/http_remote.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 1b1c2a5da5f..44f3a41e64d 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -816,7 +816,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { todo!("we should delete the local index file here if it exists"); } code => { - anyhow::bail!("server returned unexpected HTTP status code {}", code); + anyhow::bail!( + "prefetch: server returned unexpected HTTP status code {} for {}{}", + code, + self.source_id.url(), + fetched.path.display() + ); } } } @@ -1040,7 +1045,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { anyhow::bail!("crate has been deleted from the registry"); } code => { - anyhow::bail!("server returned unexpected HTTP status code {}", code); + anyhow::bail!( + "load: server returned unexpected HTTP status code {} for {}{}", + code, + self.source_id.url(), + path.display() + ); } } @@ -1298,7 +1308,10 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } } code => { - anyhow::bail!("server returned unexpected HTTP status code {}", code); + anyhow::bail!( + "server returned unexpected HTTP status code {} for changelog", + code + ); } }; From bda120ad837e6e71edb334a44e64533119402dee Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Thu, 3 Dec 2020 14:37:52 -0800 Subject: [PATCH 64/83] Remove the changelog optimization See https://github.com/rust-lang/rfcs/pull/2789#issuecomment-738194824 --- crates/cargo-test-support/src/registry.rs | 96 +--- src/cargo/sources/registry/http_remote.rs | 662 ++-------------------- tests/testsuite/http_registry.rs | 290 ++-------- 3 files changed, 105 insertions(+), 943 deletions(-) diff --git a/crates/cargo-test-support/src/registry.rs b/crates/cargo-test-support/src/registry.rs index 0555524c1b2..60fb78504a1 100644 --- a/crates/cargo-test-support/src/registry.rs +++ b/crates/cargo-test-support/src/registry.rs @@ -218,13 +218,6 @@ pub fn init() { ); } -#[derive(Debug, Copy, Clone)] -pub enum RegistryServerConfiguration { - NoChangelog, - WithChangelog, - ChangelogNoRange, -} - pub struct RegistryServer { done: Arc, server: Option>, @@ -246,18 +239,13 @@ impl Drop for RegistryServer { } #[must_use] -pub fn serve_registry( - registry_path: PathBuf, - config: RegistryServerConfiguration, -) -> RegistryServer { +pub fn serve_registry(registry_path: PathBuf) -> RegistryServer { let listener = TcpListener::bind("127.0.0.1:0").unwrap(); let addr = listener.local_addr().unwrap(); let done = Arc::new(AtomicBool::new(false)); let done2 = done.clone(); let t = thread::spawn(move || { - let support_range = !matches!(config, RegistryServerConfiguration::ChangelogNoRange); - let mut line = String::new(); 'server: while !done2.load(Ordering::SeqCst) { let (socket, _) = listener.accept().unwrap(); @@ -282,16 +270,8 @@ pub fn serve_registry( ); let file = registry_path.join(path); - let mut exists = file.exists(); - if file.ends_with("changelog") - && matches!(config, RegistryServerConfiguration::NoChangelog) - { - exists = false; - } - - if exists { + if file.exists() { // Grab some other headers we may care about. - let mut range = None; let mut if_modified_since = None; let mut if_none_match = None; loop { @@ -313,20 +293,7 @@ pub fn serve_registry( .map(|v| v.trim()) .unwrap(); - if line.starts_with("Range:") { - let value = value.strip_prefix("bytes=").unwrap_or(value); - if !value.is_empty() { - let mut parts = value.split('-'); - let start = parts.next().unwrap().parse::().unwrap(); - let end = parts.next().unwrap(); - let end = if end.is_empty() { - None - } else { - Some(end.parse::().unwrap()) - }; - range = Some((start, end)); - } - } else if line.starts_with("If-Modified-Since:") { + if line.starts_with("If-Modified-Since:") { if_modified_since = Some(value.to_owned()); } else if line.starts_with("If-None-Match:") { if_none_match = Some(value.trim_matches('"').to_owned()); @@ -356,47 +323,14 @@ pub fn serve_registry( any_match = true; } } - if any_match { - assert!(range.is_none()); - } // Write out the main response line. - let data_len = data.len(); - let mut data = &data[..]; if any_match && all_match { buf.get_mut() .write_all(b"HTTP/1.1 304 Not Modified\r\n") .unwrap(); - } else if range.is_none() || !support_range { + } else { buf.get_mut().write_all(b"HTTP/1.1 200 OK\r\n").unwrap(); - } else if let Some((start, end)) = range { - if start >= data.len() - || end.unwrap_or(0) >= data.len() - || end.unwrap_or(start) <= start - { - buf.get_mut() - .write_all(b"HTTP/1.1 416 Range Not Satisfiable\r\n") - .unwrap(); - } else { - buf.get_mut() - .write_all(b"HTTP/1.1 206 Partial Content\r\n") - .unwrap(); - - // Slice the data as requested and include a header indicating that. - // Note that start and end are both inclusive! - data = &data[start..=end.unwrap_or(data_len - 1)]; - buf.get_mut() - .write_all( - format!( - "Content-Range: bytes {}-{}/{}\r\n", - start, - end.unwrap_or(data_len - 1), - data_len - ) - .as_bytes(), - ) - .unwrap(); - } } // TODO: Support 451 for crate index deletions. @@ -413,7 +347,7 @@ pub fn serve_registry( // And finally, write out the body. buf.get_mut().write_all(b"\r\n").unwrap(); - buf.get_mut().write_all(data).unwrap(); + buf.get_mut().write_all(&data).unwrap(); } else { loop { line.clear(); @@ -684,26 +618,6 @@ impl Package { t!(fs::create_dir_all(dst.parent().unwrap())); t!(fs::write(&dst, prev + &line[..] + "\n")); - // Update changelog. - let dst = registry_path.join("changelog"); - t!(fs::create_dir_all(dst.parent().unwrap())); - let mut epoch = 1; - if dst.exists() { - // Fish out the current epoch. - let prev = fs::read_to_string(&dst).unwrap_or_default(); - let e = prev.split_whitespace().next().unwrap(); - if !e.is_empty() { - epoch = e.parse::().unwrap(); - } - } - let mut changelog = t!(fs::OpenOptions::new().append(true).create(true).open(dst)); - t!(writeln!( - changelog, - "{} 2020-11-20 16:54:07 {}", - epoch, self.name - )); - t!(changelog.flush()); - // Add the new file to the index. if !self.local { let repo = t!(git2::Repository::open(®istry_path)); diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 44f3a41e64d..652ad4fa635 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -17,7 +17,7 @@ use crate::util::{self, Config, Filesystem, Progress, ProgressStyle, Sha256}; use bytesize::ByteSize; use curl::easy::{Easy, HttpVersion, List}; use curl::multi::{EasyHandle, Multi}; -use log::{debug, trace, warn}; +use log::{debug, trace}; use std::cell::{Cell, RefCell, RefMut}; use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Write as FmtWrite; @@ -32,70 +32,6 @@ use std::time::Instant; const ETAG: &'static [u8] = b"ETag"; const LAST_MODIFIED: &'static [u8] = b"Last-Modified"; -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -/// The last known state of the changelog. -enum ChangelogState { - /// The server does not host a changelog. - /// - /// In this state, we must double-check with the server every time we want to load an index - /// file in case that file has changed upstream. - Unsupported, - - /// The server served us a changelog in the past. - Synchronized { - /// The last known changelog epoch (see the RFC). - /// - /// The epoch allows the server to start the changelog over for garbage-collection purposes - /// in a way that the client can detect. - epoch: usize, - - /// The last known length of the changelog (in bytes). - /// - /// This is used to efficiently fetch only the suffix of the changelog that has been - /// appended since we last read it. - length: usize, - }, -} - -impl ChangelogState { - fn is_synchronized(&self) -> bool { - matches!(self, ChangelogState::Synchronized { .. }) - } -} - -impl Into<(ChangelogState, InternedString)> for ChangelogState { - fn into(self) -> (ChangelogState, InternedString) { - let is = InternedString::from(self.to_string()); - (self, is) - } -} - -impl std::str::FromStr for ChangelogState { - type Err = &'static str; - - fn from_str(s: &str) -> Result { - if s == "unsupported" { - return Ok(ChangelogState::Unsupported); - } - - let mut parts = s.split('.'); - let epoch = parts.next().expect("split always yields one item"); - let epoch = usize::from_str_radix(epoch, 10).map_err(|_| "invalid epoch")?; - let length = parts.next().ok_or("no changelog offset")?; - let length = usize::from_str_radix(length, 10).map_err(|_| "invalid changelog offset")?; - Ok(ChangelogState::Synchronized { epoch, length }) - } -} - -impl ToString for ChangelogState { - fn to_string(&self) -> String { - match *self { - ChangelogState::Unsupported => String::from("unsupported"), - ChangelogState::Synchronized { epoch, length } => format!("{}.{}", epoch, length), - } - } -} - /// A registry served by the HTTP-based registry API. /// /// This type is primarily accessed through the [`RegistryData`] trait. @@ -109,15 +45,11 @@ impl ToString for ChangelogState { /// Implemented naively, this leads to a significant amount of network traffic, as a lookup of any /// index file would need to check with the remote backend if the index file has changed. This /// cost is somewhat mitigated by the use of HTTP conditional feches (`If-Modified-Since` and -/// `If-None-Match` for `ETag`s) which can be efficiently handled by HTTP/2, but it's still not -/// ideal. The RFC therefor also introduces the (optional) notion of a _changelog_. The changelog -/// is a dedicated append-only file on the server that lists every crate index change. This allows -/// the client to fetch the changelog, invalidate its locally cached index files for only the -/// changed crates, and then not worry about double-checking with the server for each index file. +/// `If-None-Match` for `ETag`s) which can be efficiently handled by HTTP/2. /// /// In order to take advantage of HTTP/2's ability to efficiently send multiple concurrent HTTP -/// requests over a single connection, `HttpRegistry` also supports asynchronous prefetching. The -/// caller queues up a number of index files they think it is likely they will want to access, and +/// requests over a single connection, `HttpRegistry` supports asynchronous prefetching. The caller +/// queues up a number of index files they think it is likely they will want to access, and /// `HttpRegistry` fires off requests for each one without synchronously waiting for the response. /// The caller then drives the processing of the responses, which update the index files that are /// stored on disk, before moving on to the _actual_ dependency resolution. See @@ -130,13 +62,7 @@ pub struct HttpRegistry<'cfg> { source_id: SourceId, config: &'cfg Config, - /// The current (last known) state of the changelog. - at: Cell<(ChangelogState, InternedString)>, - - /// Have we loaded self.at from .last-updated (by calling prepare) yet? - checked_for_at: Cell, - - /// Cached HTTP handle for synchronous requests (changelog + RegistryData::load). + /// Cached HTTP handle for synchronous requests (RegistryData::load). http: RefCell>, /// HTTP multi-handle for asynchronous/parallel requests during prefetching. @@ -233,8 +159,6 @@ impl<'cfg> HttpRegistry<'cfg> { cache_path: config.registry_cache_path().join(name), source_id, config, - at: Cell::new(ChangelogState::Unsupported.into()), - checked_for_at: Cell::new(false), http: RefCell::new(None), prefetch: Multi::new(), multiplexing: false, @@ -306,23 +230,6 @@ const LAST_UPDATED_FILE: &str = ".last-updated"; impl<'cfg> RegistryData for HttpRegistry<'cfg> { fn prepare(&self) -> CargoResult<()> { - // Load last known changelog state from LAST_UPDATED_FILE. - if !self.checked_for_at.get() { - self.checked_for_at.set(true); - let path = self.config.assert_package_cache_locked(&self.index_path); - let path = path.join(LAST_UPDATED_FILE); - if path.exists() { - let cl_state = paths::read(&path)?; - let cl_state: ChangelogState = cl_state - .parse() - .map_err(|e| anyhow::anyhow!("{}", e)) - .chain_err(|| { - format!("failed to parse last changelog state: '{}'", cl_state) - })?; - self.at.set(cl_state.into()); - } - } - if !self.config.offline() { let mut http = if let Ok(h) = self.http.try_borrow_mut() { h @@ -397,27 +304,22 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // // We first check if we have a local copy of the given index file. // - // If we do, and the server has a changelog, then we know that the index file is up to - // date (as of when we last checked the changelog), so there's no need to double-check with - // the server that the file isn't stale. We can just tell the next call to - // `next_prefetched` to go ahead with this path immediately. If we _need_ a newer version - // of it, `update_index` will be called and then `prefetch` will be called again. + // If we don't have a local copy of the index file, we obviously need to fetch it from the + // server. // - // If we do, but the server does not have a changelog, we need to check with the server if - // the index file has changed upstream. We do this using a conditional HTTP request using - // the `Last-Modified` and `ETag` headers we got when we fetched the currently cached index - // file (those headers are stored in the first two lines of each index file). That way, if - // nothing has changed (likely the common case), the server doesn't have to send us - // any data, just a 304 Not Modified. - // - // If we don't have a local copy of the index file, we need to fetch it from the server. + // If we do, we may need to check with the server if the index file has changed upstream. + // This happens if cargo has explicitly requested that we fetch the _latest_ versions of + // dependencies. We do this using a conditional HTTP request using the `Last-Modified` and + // `ETag` headers we got when we fetched the currently cached index file (those headers are + // stored in the first two lines of each index file). That way, if nothing has changed + // (likely the common case), the server doesn't have to send us any data, just a 304 Not + // Modified. + let pkg = root.join(path); let bytes; + // TODO: Can we avoid this file-system interaction if we're already downloading? let was = if pkg.exists() { - if self.at.get().0.is_synchronized() - || !self.requested_update - || self.fresh.contains(path) - { + if !self.requested_update || self.fresh.contains(path) { let req = if let Some(req) = req { req } else { @@ -429,10 +331,10 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { trace!("not prefetching fresh {}", name); // We already have this file locally, and we don't need to double-check it with - // upstream because we have a changelog, or because the client hasn't requested an - // index update. So there's really nothing to prefetch. We do keep track of the - // request though so that we will eventually yield this back to the caller who may - // then want to prefetch other transitive dependencies. + // upstream because the client hasn't requested an index update. So there's really + // nothing to prefetch. We do keep track of the request though so that we will + // eventually yield this back to the caller who may then want to prefetch other + // transitive dependencies. use std::collections::btree_map::Entry; match self.downloads.eager.entry(path.to_path_buf()) { Entry::Occupied(mut o) => { @@ -863,12 +765,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } fn current_version(&self) -> Option { - let cl_state = self.at.get(); - if cl_state.0.is_synchronized() { - Some(cl_state.1) - } else { - None - } + // TODO: Can we use the time of the last call to update_index here? + None } fn load( @@ -878,7 +776,9 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, ) -> CargoResult<()> { // NOTE: This is pretty much a synchronous version of the prefetch() + next_prefetched() - // dance. Much of the code is sort-of duplicated, which isn't great, but it works. + // dance. Much of the code is sort-of duplicated, which isn't great, but it's moderalyte + // straightforward and works. When the real resolver supports a load returning "not yet", + // load and prefetch can be merged. let pkg = root.join(path); let bytes; @@ -900,23 +800,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { anyhow::bail!("index file is missing HTTP header header"); }; - let is_synchronized = self.at.get().0.is_synchronized(); - - let is_fresh = if is_synchronized { - if self.requested_update - && path.ends_with("config.json") - && !self.fresh.contains(path) - { - debug!("double-checking freshness of config.json on update"); - false - } else { - trace!( - "using local {} as changelog is synchronized", - path.display() - ); - true - } - } else if !self.requested_update { + let is_fresh = if !self.requested_update { trace!( "using local {} as user did not request update", path.display() @@ -1099,498 +983,32 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { return Ok(()); } - // NOTE: We check for the changelog even if the server did not previously have a changelog - // in case it has wisened up since then. - - debug!("updating the index"); - - // Make sure that subsequent loads double-check with the server again. - self.requested_update = true; - self.fresh.clear(); - - self.prepare()?; let path = self.config.assert_package_cache_locked(&self.index_path); self.config .shell() .status("Updating", self.source_id.display_index())?; - let url = self.source_id.url(); - let mut handle = self.http()?; - handle.url(&format!("{}/changelog", url))?; - - // TODO: Retry logic using network::with_retry? - - /// How are we attempting to fetch the changelog? - #[derive(Debug, Copy, Clone)] - enum ChangelogStrategy { - /// We are fetching the changelog with no historical context. - FirstFetch { full: bool }, - /// We are trying to follow the changelog to update our view of the index. - Follow { epoch: usize, length: usize }, - } - let mut plan = if let ChangelogState::Synchronized { epoch, length } = self.at.get().0 { - ChangelogStrategy::Follow { epoch, length } - } else { - ChangelogStrategy::FirstFetch { full: false } - }; - - // NOTE: Loop in case of rollover, in which case we need to fetch it starting at byte 0. - let was = self.at.get(); - 'changelog: loop { - // Reset in case we looped. - handle.range("")?; - handle.resume_from(0)?; - - match plan { - ChangelogStrategy::Follow { length, .. } => { - handle.resume_from(length as u64)?; - } - ChangelogStrategy::FirstFetch { full: false } => { - // We really just need the epoch number and file size, - // which we can get at by fetching just the first line. - // "1 2019-10-18T23:51:23Z ".len() == 23 - handle.range("0-23")?; - } - ChangelogStrategy::FirstFetch { full: _ } => {} - } - - let mut contents = Vec::new(); - let mut total_bytes = None; - let mut transfer = handle.transfer(); - transfer.write_function(|buf| { - contents.extend_from_slice(buf); - Ok(buf.len()) - })?; - - // Extract `Content-Range` header to learn the total size of the changelog. - // - // We need the total size from `Content-Range` since we only fetch a very small subset - // of the changelog when we first access the server (just enought to get the epoch). - transfer.header_function(|buf| { - const CONTENT_RANGE: &'static [u8] = b"Content-Range:"; - if buf.len() > CONTENT_RANGE.len() - && buf[..CONTENT_RANGE.len()].eq_ignore_ascii_case(CONTENT_RANGE) - { - let mut buf = &buf[CONTENT_RANGE.len()..]; - - // Trim leading whitespace. - while !buf.is_empty() && buf[0] == b' ' { - buf = &buf[1..]; - } - - // Check that the Content-Range unit is indeed bytes. - const BYTES_UNIT: &'static [u8] = b"bytes "; - if !buf.starts_with(BYTES_UNIT) { - return true; - } - buf = &buf[BYTES_UNIT.len()..]; - - // Extract out the total length. - let rest = buf.splitn(2, |&c| c == b'/'); - if let Some(complete_length) = rest.skip(1 /* byte-range */).next() { - if complete_length.starts_with(b"*") { - // The server does not know the total size of the changelog. - // This seems weird, but not much we can do about it. - // We'll end up falling back to a full fetch. - return true; - } - let complete_length = complete_length - .splitn(2, |&c| c == b' ') - .next() - .expect("split always yields >= 1 element"); - if complete_length.into_iter().all(|c| c.is_ascii_digit()) { - let complete_length = - std::str::from_utf8(complete_length).expect("only ascii digits"); - total_bytes = Some( - usize::from_str_radix(complete_length, 10) - .expect("ascii digits make for valid numbers"), - ); - } - } - } - true - })?; - - transfer - .perform() - .chain_err(|| format!("failed to fetch index changelog from `{}`", url))?; - drop(transfer); - - let mut contents = &contents[..]; - let total_bytes = match handle.response_code()? { - 200 => { - // The server does not support Range: requests, - // so we need to manually slice the bytes we got back. - // - // TODO: This is a really bad operating state! We're fetching the _entire_ - // changelog each time we update the changelog. Not clear if that's better than - // just validating each index lookup? - let total_bytes = contents.len(); - if let ChangelogStrategy::Follow { length, .. } = plan { - if contents.len() < length || contents.len() == 0 { - // The changelog must have rolled over. - // Luckily, since the server sent the whole response, - // we can just continue as if that was our plan all along. - plan = ChangelogStrategy::FirstFetch { full: true }; - } else { - contents = &contents[length..]; - } - } - total_bytes - } - 206 => { - // 206 Partial Content -- this is what we expect to get. - match total_bytes { - None => { - // The server sent us back only the byte range we asked for, - // but it did not inform us of the total size of the changelog. - // This is fine if we're just following the changelog, since we can - // compute the total size (old size + size of content), but if we're - // trying to _start_ following the changelog, we need to know its - // current size to know where to fetch from next time! - match plan { - ChangelogStrategy::FirstFetch { full } => { - assert!(!full, "got partial response without Range:"); - - // Our only recourse is to fetch the full changelog. - plan = ChangelogStrategy::FirstFetch { full: true }; - continue; - } - ChangelogStrategy::Follow { length, .. } => length + contents.len(), - } - } - Some(b) => b, - } - } - 403 | 404 => { - // The server does not have a changelog. - if self.at.get().0.is_synchronized() { - // We used to have a changelog, but now we don't. It's important that we - // record that fact so that later calls to load() will all double-check - // with the server. - self.at.set(ChangelogState::Unsupported.into()); - } - break 'changelog; - } - 416 => { - // 416 Range Not Satisfiable - // - // This can mean one of two things: - // - // 1. The changelog has rolled over, so we requested too much data. - // 2. There are no new entries (our request goes beyond the end of the - // changelog). - // - // If we hit case 1, we need to fetch the start of the new changelog instead. - // If we hit case 2, what we'd like to do is, well, nothing. - match (plan, total_bytes) { - (ChangelogStrategy::Follow { length, .. }, Some(total_bytes)) - if length == total_bytes => - { - contents = &[]; - total_bytes - } - // We must assume we're in case 1. - (ChangelogStrategy::FirstFetch { full }, _) => { - // Our request for just the start of the changelog (Range: 0-23) failed. - // This probably means that the changelog is empty, but we do a full fetch - // to make sure. - assert!(!full); - plan = ChangelogStrategy::FirstFetch { full: true }; - continue; - } - (ChangelogStrategy::Follow { .. }, _) => { - // We requested a byte range past the end of the changelog, which - // implies that it must have rolled over (and shrunk). - plan = ChangelogStrategy::FirstFetch { full: false }; - continue; - } - } - } - code => { - anyhow::bail!( - "server returned unexpected HTTP status code {} for changelog", - code - ); - } - }; - - if contents.len() == 0 { - if total_bytes == 0 { - // We can't use the changelog, since we don't know its epoch. - self.at.set(ChangelogState::Unsupported.into()); - } else { - // There are no changes in changelog, so there's supposedly nothing to update. - // - // TODO: This isn't fool-proof. It _could_ be that the changelog rolled over, - // and just so happens to be exactly the same length as the old changelog was - // last time we checked it. This is quite unlikely, but not impossible. To fix - // this, we should keep track of ETag + Last-Modified, and check that here. If - // they do not match, then fall back to a ::FirstFetch. - } - break; - } - - enum WhatLine { - First, - Second { first_failed: bool }, - Later, - } - let mut at = WhatLine::First; - - let mut line = String::new(); - let mut new_changelog = false; - let mut fetched_epoch = None; - while contents.read_line(&mut line)? != 0 { - // First, make sure that the line is a _complete_ line. - // It's possible that the changelog rolled over, _but_ our old range was still - // valid. In that case, the returned content may not start at a line bounary, and - // parsing will fail in weird ways. Or worse yet, succeed but with an incorrect - // epoch number! Should that happen, we need to detect it. - // - // Lines _should_ look like this: - // 1 2019-10-18T23:52:00Z anyhow - // - // That is: epoch date time crate. - let mut parts = line.trim().split_whitespace(); - let epoch = parts.next().expect("split always has one element"); - let krate = parts.skip(2).next(); - - if epoch.is_empty() { - // Skip empty lines. - - // We _have_ observed a line change though, - // so the next epoch read is guaranteed to read a complete epoch. - if let WhatLine::First = at { - at = WhatLine::Second { - first_failed: false, - }; - } - continue; - } - - let epoch = if let Ok(epoch) = epoch.parse::() { - fetched_epoch = Some(epoch); - epoch - } else if let WhatLine::First = at { - // The line is clearly not valid. - // - // This means the changelog rolled over. Unfortunately, the byte range we - // requested does not contain the epoch, so we don't have enough information to - // move forwards. We need to parse one more line. - - // If we got here during a first fetch (which fetches starting at byte 0), the - // server's changelog is entirely bad. - if let ChangelogStrategy::FirstFetch { .. } = plan { - warn!("server changelog does not begin with an epoch"); - // Ensure that all future index fetches check with server - self.at.set(ChangelogState::Unsupported.into()); - break 'changelog; - } - - debug!( - "index {} changelog has invalid first line; assuming rollover", - url - ); - at = WhatLine::Second { first_failed: true }; - continue; - } else { - warn!("index {} changelog has invalid lines", url); - // Ensure that all future index fetches check with server - self.at.set(ChangelogState::Unsupported.into()); - break 'changelog; - }; - - match plan { - ChangelogStrategy::FirstFetch { .. } => { - // This requested bytes starting at 0, so the epoch we parsed out is valid. - - // We don't actually care about the remainder of the changelog, - // since we've completely purged our local index. - new_changelog = true; - at = WhatLine::Later; - break; - } - ChangelogStrategy::Follow { - epoch: last_epoch, .. - } if last_epoch != epoch => { - // There has clearly been a rollover, though we have to be a little - // careful. Since we requested a particular byte offset, the parsed epoch - // may not actually have been the "true" epoch. Imagine that we fetched: - // - // 1 2019-10-18 23:52:00 anyhow - // - // it _could_ be that that's just an unfortunate slice of this line: - // - // 21 2019-10-18 23:52:00 anyhow - // - // So, we need to parse a second line to ensure we have the _true_ line. - if let WhatLine::First = at { - at = WhatLine::Second { first_failed: true }; - continue; - } - - debug!("index {} changelog has rolled over", url); - - // TODO: Try previous changelog if available? - // https://github.com/rust-lang/rfcs/pull/2789#issuecomment-730024821 - - // We're starting over with this new, rolled-over changelog, so we don't - // care about its contents. - new_changelog = true; - at = WhatLine::Later; - break; - } - ChangelogStrategy::Follow { .. } => {} - } - - at = match at { - WhatLine::First => WhatLine::Second { - first_failed: false, - }, - WhatLine::Second { first_failed: true } => { - // If the first line failed to parse, that must mean there was a rollover. - // If we get here, that means that we're in ::Follow mode, but that the - // next line had an epoch that _did_ match our own epoch, which would imply - // there _wasn't_ a rollover. Something is _very_ wrong. - unreachable!("server response byte offset mismatch"); - } - WhatLine::Second { first_failed: _ } | WhatLine::Later => WhatLine::Later, - }; - - let krate = if let Some(krate) = krate { - krate - } else { - warn!("index {} changelog has an invalid line: {}", url, line); - - // We could error out here, but it's always safe for us to ignore the changelog - // and just double-check all index file loads instead, so we prefer that. - self.at.set(ChangelogState::Unsupported.into()); - break 'changelog; - }; - - if krate.is_empty() { - warn!("index {} changelog has an invalid line: {}", url, line); - - // Same as above -- prefer working to failing. - self.at.set(ChangelogState::Unsupported.into()); - break 'changelog; - } - - // Remove the outdated index file -- we'll have to re-fetch it - let path = path.join(&Path::new(&make_dep_prefix(krate))).join(krate); - if path.exists() { - paths::remove_file(path)?; - } - } - - if let WhatLine::Second { first_failed } = at { - let (epoch, length) = if let ChangelogStrategy::Follow { epoch, length } = plan { - (epoch, length) - } else { - unreachable!("::FirstFetch always breaks on the first line"); - }; - - if first_failed { - // The changelog must have rolled over. This means that whatever we got in - // `fetched_epoch` may not be valid due to weird byte offsets. Unfortunately, - // we never got a second line to ensure we parsed a complete epoch either! Our - // only option here is to do another request to the server for the start of the - // changelog. - plan = ChangelogStrategy::FirstFetch { full: false }; - continue; - } - - // There is a _slight_ chance that there was a rollover, and that the - // byte offset we provided happened to be valid, and happened to perfectly - // align so that the string starts with a number that just so happens to be - // the same as the old epoch. That's... weird, but possible. - // - // Basically, imagine that the previous epoch we knew about was 3, and the first - // (and only) line we got in the changelog diff we requested was: - // - // 3 2019-10-18 23:52:00 anyhow - // - // All good, right? Well, not _quite_. - // What if that is just a weird slicing of this line: - // - // 13 2019-10-18 23:52:00 anyhow - // - // And since there was no second line, we never saw epoch 13, and just kept going - // as if everything is fine. To make absolutely sure, we do another fetch of the - // changelog that includes some earlier data as well. That fetch should get more - // than one line, and so detect any such epoch shenanigans. - plan = ChangelogStrategy::Follow { - epoch, - // How far back we go here isn't super important. We just have to make sure we - // go at least one line back, so that the response will include at least two - // lines. The longer back we go, the more index entries we will unnecessarily - // invalidate. If we don't go far enough, we'll just end up in this clause - // again and do another round trip to go further back. - length: length.saturating_sub(16), - }; - continue; - } - - let epoch = - fetched_epoch.expect("changelog was non-empty, and epoch parsing didn't fail"); - - if new_changelog { - debug!( - "index {} is at epoch {} (offset: {})", - url, epoch, total_bytes - ); - - // We don't know which index entries are now invalid and which are not, - // so we have to purge them all. - // - // TODO: Will this cause issues with directory locking? - if path.exists() { - paths::remove_dir_all(&path)?; - paths::create_dir_all(&path)?; - } - - // From this point forward, we're synchronized with the changelog! - self.at.set( - ChangelogState::Synchronized { - epoch, - length: total_bytes, - } - .into(), - ); - } else { - // Keep track of our new byte offset into the changelog. - self.at.set( - ChangelogState::Synchronized { - epoch, - length: total_bytes, - } - .into(), - ); - } - break; - } - - // Reset the http handle for later requests that re-use the Easy. - handle.range("")?; - handle.resume_from(0)?; + // Actually updating the index is more or less a no-op for this implementation. + // All it does is ensure that a subsequent load/prefetch will double-check files with the + // server rather than rely on a locally cached copy of the index files. + debug!("updating the index"); + self.requested_update = true; + self.fresh.clear(); self.config.updated_sources().insert(self.source_id); - // Record the latest known state of the index if it changed. - let lu_file = path.join(LAST_UPDATED_FILE); - if !lu_file.exists() || was != self.at.get() { - if !path.exists() { - paths::create_dir_all(&path)?; - } - let mut file = paths::create(&lu_file)?; - file.write_all(self.at.get().1.as_bytes())?; - file.flush()?; + // Create a dummy file to record the mtime for when we updated the + // index. + if !path.exists() { + paths::create_dir_all(&path)?; } + paths::create(&path.join(LAST_UPDATED_FILE))?; Ok(()) } + // NOTE: What follows is identical to remote.rs + fn download(&mut self, pkg: PackageId, _checksum: &str) -> CargoResult { let filename = self.filename(pkg); diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index 551330b2860..7ea5a2f8b59 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -7,7 +7,7 @@ use cargo_test_support::paths::{self, CargoPathExt}; use cargo_test_support::registry::{ - registry_path, serve_registry, Dependency, Package, RegistryServer, RegistryServerConfiguration, + registry_path, serve_registry, Dependency, Package, RegistryServer, }; use cargo_test_support::t; use cargo_test_support::{basic_manifest, project}; @@ -21,8 +21,8 @@ fn cargo(p: &cargo_test_support::Project, s: &str) -> cargo_test_support::Execs e } -fn setup(config: RegistryServerConfiguration) -> RegistryServer { - let server = serve_registry(registry_path(), config); +fn setup() -> RegistryServer { + let server = serve_registry(registry_path()); let root = paths::root(); t!(fs::create_dir(&root.join(".cargo"))); @@ -44,27 +44,9 @@ fn setup(config: RegistryServerConfiguration) -> RegistryServer { server } -macro_rules! test_w_wo_changelog { - ($name:ident) => { - mod $name { - use super::{$name, RegistryServerConfiguration}; - - #[cargo_test] - fn no_changelog() { - $name(RegistryServerConfiguration::NoChangelog); - } - - #[cargo_test] - fn changelog() { - $name(RegistryServerConfiguration::WithChangelog); - } - } - }; -} - -test_w_wo_changelog!(simple); -fn simple(config: RegistryServerConfiguration) { - let server = setup(config); +#[cargo_test] +fn simple() { + let server = setup(); let url = format!("http://{}/", server.addr()); let p = project() .file( @@ -114,9 +96,9 @@ fn simple(config: RegistryServerConfiguration) { .run(); } -test_w_wo_changelog!(deps); -fn deps(config: RegistryServerConfiguration) { - let server = setup(config); +#[cargo_test] +fn deps() { + let server = setup(); let url = format!("http://{}/", server.addr()); let p = project() .file( @@ -155,9 +137,9 @@ fn deps(config: RegistryServerConfiguration) { .run(); } -test_w_wo_changelog!(nonexistent); -fn nonexistent(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn nonexistent() { + let _server = setup(); Package::new("init", "0.0.1").publish(); let p = project() @@ -190,9 +172,9 @@ required by package `foo v0.0.1 ([..])` .run(); } -test_w_wo_changelog!(update_registry); -fn update_registry(config: RegistryServerConfiguration) { - let server = setup(config); +#[cargo_test] +fn update_registry() { + let server = setup(); let url = format!("http://{}/", server.addr()); Package::new("init", "0.0.1").publish(); @@ -241,161 +223,9 @@ required by package `foo v0.0.1 ([..])` .run(); } -test_w_wo_changelog!(invalidate_index_on_rollover); -fn invalidate_index_on_rollover(config: RegistryServerConfiguration) { - let server = setup(config); - let url = format!("http://{}/", server.addr()); - - // First generate a Cargo.lock and a clone of the registry index at the - // "head" of the current registry. - let p = project() - .file( - "Cargo.toml", - r#" - [project] - name = "foo" - version = "0.5.0" - authors = [] - - [dependencies] - a = "0.1.0" - "#, - ) - .file("src/main.rs", "fn main() {}") - .build(); - Package::new("a", "0.1.0").publish(); - cargo(&p, "build").run(); - - // Fish out the path to the .last-updated file - let last_updated = if !matches!(config, RegistryServerConfiguration::NoChangelog) { - let dir = fs::read_dir(paths::home().join(".cargo/registry/index/")) - .unwrap() - .last() - .unwrap() - .unwrap(); - - Some(dir.path().join(".last-updated")) - } else { - None - }; - - if let Some(last_updated) = &last_updated { - // Check the contents of the last-updated file to see that it's on epoch 1. - assert_eq!( - fs::read_to_string(last_updated).unwrap(), - format!("1.{}", "1 YYYY-MM-DD HH:MM:SS a\n".len()), - "{}", - last_updated.display() - ); - } - - // Next, publish a new version and make the changelog roll over - Package::new("a", "0.1.1").publish(); - assert!(registry_path().join("changelog").exists(),); - fs::write( - registry_path().join("changelog"), - b"2 2020-11-23 09:45:09 a\n", - ) - .unwrap(); - - // Now, try to build a project that relies on the newly published version. - // It should realize it's not in cache, and update the registry. - // The registry should detect the rollover, invalidate the cache, - // and then succeed in fetching 0.1.1. - let p2 = project() - .at("foo2") - .file( - "Cargo.toml", - r#" - [project] - name = "foo" - version = "0.5.0" - authors = [] - - [dependencies] - a = "0.1.1" - "#, - ) - .file("src/main.rs", "fn main() {}") - .build(); - - // NOTE: we see UPDATING even when the changelog isn't used even though it is a no-op since - // update_index is called whenever a version is not in the index cache. - cargo(&p2, "build") - .with_stderr(format!( - "\ -[UPDATING] [..] -[PREFETCHING] index files ... -[DOWNLOADING] crates ... -[DOWNLOADED] a v0.1.1 (http registry `{reg}`) -[COMPILING] a v0.1.1 -[COMPILING] foo v0.5.0 ([CWD]) -[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s -", - reg = url - )) - .run(); - - if let Some(last_updated) = &last_updated { - // Check the contents of the last-updated file to see that it picked up the new epoch. - assert_eq!( - fs::read_to_string(last_updated).unwrap(), - format!("2.{}", "1 YYYY-MM-DD HH:MM:SS a\n".len()), - ); - } - - // Next, publish a new version and make the changelog empty (which is also a rollover) - Package::new("a", "0.1.2").publish(); - assert!(registry_path().join("changelog").exists(),); - fs::write(registry_path().join("changelog"), b"").unwrap(); - - // And again, build a project that depends on the new version. - // It should realize it's not in cache, and update the registry, - // which should again detect the rollover, invalidate the cache, - // and then succeed in fetching 0.1.2. - let p3 = project() - .at("foo3") - .file( - "Cargo.toml", - r#" - [project] - name = "foo" - version = "0.5.0" - authors = [] - - [dependencies] - a = "0.1.2" - "#, - ) - .file("src/main.rs", "fn main() {}") - .build(); - - // NOTE: again, we see UPDATING even when the changelog isn't used even though it is a no-op - // since update_index is called whenever a version is not in the index cache. - cargo(&p3, "build") - .with_stderr(format!( - "\ -[UPDATING] [..] -[PREFETCHING] index files ... -[DOWNLOADING] crates ... -[DOWNLOADED] a v0.1.2 (http registry `{reg}`) -[COMPILING] a v0.1.2 -[COMPILING] foo v0.5.0 ([CWD]) -[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s -", - reg = url - )) - .run(); - - if let Some(last_updated) = &last_updated { - // Check the contents of the last-updated file to see that it picked up the new epoch. - assert_eq!(fs::read_to_string(last_updated).unwrap(), "unsupported"); - } -} - -test_w_wo_changelog!(update_publish_then_update); -fn update_publish_then_update(config: RegistryServerConfiguration) { - let server = setup(config); +#[cargo_test] +fn update_publish_then_update() { + let server = setup(); let url = format!("http://{}/", server.addr()); // First generate a Cargo.lock and a clone of the registry index at the @@ -470,9 +300,9 @@ fn update_publish_then_update(config: RegistryServerConfiguration) { .run(); } -test_w_wo_changelog!(update_multiple_packages); -fn update_multiple_packages(config: RegistryServerConfiguration) { - let server = setup(config); +#[cargo_test] +fn update_multiple_packages() { + let server = setup(); let url = format!("http://{}/", server.addr()); let p = project() .file( @@ -534,9 +364,9 @@ fn update_multiple_packages(config: RegistryServerConfiguration) { .run(); } -test_w_wo_changelog!(bundled_crate_in_registry); -fn bundled_crate_in_registry(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn bundled_crate_in_registry() { + let _server = setup(); let p = project() .file( "Cargo.toml", @@ -577,9 +407,9 @@ fn bundled_crate_in_registry(config: RegistryServerConfiguration) { cargo(&p, "run").run(); } -test_w_wo_changelog!(update_same_prefix_oh_my_how_was_this_a_bug); -fn update_same_prefix_oh_my_how_was_this_a_bug(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn update_same_prefix_oh_my_how_was_this_a_bug() { + let _server = setup(); let p = project() .file( "Cargo.toml", @@ -605,9 +435,9 @@ fn update_same_prefix_oh_my_how_was_this_a_bug(config: RegistryServerConfigurati cargo(&p, "update -pfoobar --precise=0.2.0").run(); } -test_w_wo_changelog!(use_semver); -fn use_semver(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn use_semver() { + let _server = setup(); let p = project() .file( "Cargo.toml", @@ -629,9 +459,9 @@ fn use_semver(config: RegistryServerConfiguration) { cargo(&p, "build").run(); } -test_w_wo_changelog!(use_semver_package_incorrectly); -fn use_semver_package_incorrectly(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn use_semver_package_incorrectly() { + let _server = setup(); let p = project() .file( "Cargo.toml", @@ -679,9 +509,9 @@ required by package `b v0.1.0 ([..])` .run(); } -test_w_wo_changelog!(only_download_relevant); -fn only_download_relevant(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn only_download_relevant() { + let _server = setup(); let p = project() .file( "Cargo.toml", @@ -721,9 +551,9 @@ fn only_download_relevant(config: RegistryServerConfiguration) { .run(); } -test_w_wo_changelog!(resolve_and_backtracking); -fn resolve_and_backtracking(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn resolve_and_backtracking() { + let _server = setup(); let p = project() .file( "Cargo.toml", @@ -748,9 +578,9 @@ fn resolve_and_backtracking(config: RegistryServerConfiguration) { cargo(&p, "build").run(); } -test_w_wo_changelog!(disallow_network); -fn disallow_network(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn disallow_network() { + let _server = setup(); let p = project() .file( "Cargo.toml", @@ -790,9 +620,9 @@ Caused by: .run(); } -test_w_wo_changelog!(add_dep_dont_update_registry); -fn add_dep_dont_update_registry(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn add_dep_dont_update_registry() { + let _server = setup(); let p = project() .file( "Cargo.toml", @@ -851,9 +681,9 @@ fn add_dep_dont_update_registry(config: RegistryServerConfiguration) { .run(); } -test_w_wo_changelog!(bump_version_dont_update_registry); -fn bump_version_dont_update_registry(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn bump_version_dont_update_registry() { + let _server = setup(); let p = project() .file( "Cargo.toml", @@ -910,9 +740,9 @@ fn bump_version_dont_update_registry(config: RegistryServerConfiguration) { .run(); } -test_w_wo_changelog!(toml_lies_but_index_is_truth); -fn toml_lies_but_index_is_truth(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn toml_lies_but_index_is_truth() { + let _server = setup(); Package::new("foo", "0.2.0").publish(); Package::new("bar", "0.3.0") .dep("foo", "0.2.0") @@ -950,9 +780,9 @@ fn toml_lies_but_index_is_truth(config: RegistryServerConfiguration) { cargo(&p, "build -v").run(); } -test_w_wo_changelog!(rename_deps_and_features); -fn rename_deps_and_features(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn rename_deps_and_features() { + let _server = setup(); Package::new("foo", "0.1.0") .file("src/lib.rs", "pub fn f1() {}") .publish(); @@ -1010,9 +840,9 @@ fn rename_deps_and_features(config: RegistryServerConfiguration) { cargo(&p, "build --features bar/another").run(); } -test_w_wo_changelog!(ignore_invalid_json_lines); -fn ignore_invalid_json_lines(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn ignore_invalid_json_lines() { + let _server = setup(); Package::new("foo", "0.1.0").publish(); Package::new("foo", "0.1.1").invalid_json(true).publish(); Package::new("foo", "0.2.0").publish(); @@ -1037,9 +867,9 @@ fn ignore_invalid_json_lines(config: RegistryServerConfiguration) { cargo(&p, "build").run(); } -test_w_wo_changelog!(readonly_registry_still_works); -fn readonly_registry_still_works(config: RegistryServerConfiguration) { - let _server = setup(config); +#[cargo_test] +fn readonly_registry_still_works() { + let _server = setup(); Package::new("foo", "0.1.0").publish(); let p = project() From 7d1fef8d7827d3f2f83f1096ba6c6d6108ef7c69 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Thu, 3 Dec 2020 16:29:17 -0800 Subject: [PATCH 65/83] nits --- src/cargo/sources/registry/http_remote.rs | 4 ++-- src/cargo/sources/registry/mod.rs | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 652ad4fa635..92727e4e77b 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -134,7 +134,7 @@ struct Download { name: InternedString, /// The version requirements for the dependency line that triggered this fetch. - // NOTE: with https://github.com/steveklabnik/semver/issues/170 the HashSet is unnecessary + // NOTE: we can get rid of the HashSet (and other complexity) if we had VersionReq::union reqs: HashSet, /// True if this download is of a direct dependency of the root crate. @@ -426,7 +426,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { self.prepare()?; let mut handle = ops::http_handle(self.config)?; - debug!("fetch {}{}", url, path.display()); + debug!("prefetch {}{}", url, path.display()); handle.get(true)?; handle.url(&format!("{}{}", url, path.display()))?; handle.follow_location(true)?; diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 88a16d3c946..3fd3d0d7fc1 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -645,7 +645,12 @@ impl<'cfg> Source for RegistrySource<'cfg> { &mut self, deps: &mut dyn ExactSizeIterator>, ) -> CargoResult<()> { - // TODO: conditional index update? + // In query, if a dependency is locked, we see if we can get away with querying it without + // doing an index update. Only if that fails do we update the index and then try again. + // Since we're in the prefetching stage here, we never want to update the index regardless + // of whether any given dependency is locked or not. Instead, we just prefetch all the + // current dependencies regardless of whether they're locked or not. If an index update is + // needed later, we'll deal with it at that time. self.index .prefetch(deps, &self.yanked_whitelist, &mut *self.ops)?; Ok(()) From cc87623a23542522f207fc8332de319448c21002 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Thu, 3 Dec 2020 16:29:41 -0800 Subject: [PATCH 66/83] Avoid index updates if we can update just one file --- src/cargo/sources/registry/http_remote.rs | 8 +++++ src/cargo/sources/registry/index.rs | 18 ++++++++++++ src/cargo/sources/registry/mod.rs | 36 +++++++++++++++++++++-- tests/testsuite/http_registry.rs | 3 +- 4 files changed, 60 insertions(+), 5 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 92727e4e77b..9a1c7c278e7 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -769,6 +769,14 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { None } + fn update_index_file(&mut self, root: &Path, path: &Path) -> CargoResult { + let pkg = root.join(path); + if pkg.exists() { + paths::remove_file(&pkg)?; + } + Ok(true) + } + fn load( &mut self, root: &Path, diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index c9d8725a850..64ac23351a6 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -371,6 +371,24 @@ impl<'cfg> RegistryIndex<'cfg> { Ok(self.summaries_cache.get_mut(&name).unwrap()) } + pub fn update_index_file( + &mut self, + pkg: InternedString, + load: &mut dyn RegistryData, + ) -> CargoResult { + let path = load.index_path(); + let root = load.assert_index_locked(path).to_path_buf(); + let mut path = make_dep_prefix(&pkg); + path.push('/'); + path.push_str(&pkg); + if load.update_index_file(&root, &Path::new(&path))? { + self.summaries_cache.remove(&pkg); + Ok(true) + } else { + Ok(false) + } + } + pub fn query_inner( &mut self, dep: &Dependency, diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 3fd3d0d7fc1..858c6dd901b 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -438,6 +438,10 @@ pub trait RegistryData { Ok(None) } + fn update_index_file(&mut self, _root: &Path, _path: &Path) -> CargoResult { + Ok(false) + } + fn load( &mut self, root: &Path, @@ -603,6 +607,11 @@ impl<'cfg> RegistrySource<'cfg> { } fn do_update(&mut self) -> CargoResult<()> { + // NOTE: It is really bad if this method is called after prefetching has completed. + // It will cause every subsequent `load` to double-check with the server again + // _synchronously_. If this is ever called, we should arguably re-run prefetching, or the + // following build will be quite slow. Consider using update_index_file instead. + self.ops.update_index()?; let path = self.ops.index_path(); self.index = index::RegistryIndex::new(self.source_id, path, self.config); @@ -674,8 +683,25 @@ impl<'cfg> Source for RegistrySource<'cfg> { if called { return Ok(()); } else { - debug!("falling back to an update"); - self.do_update()?; + // We failed to query the dependency based on the currently available index files. + // This probably means that our index file for `dep` is outdated, and does not + // contain the requested version. + // + // If the registry we are using supports per-file index updates, we tell it to + // update just the given index file and then try the query again. Otherwise, we + // fall back to a full index update. + if self + .index + .update_index_file(dep.package_name(), &mut *self.ops)? + { + debug!( + "selectively refreshed index file for {}", + dep.package_name() + ); + } else { + debug!("falling back to an update"); + self.do_update()?; + } } } @@ -750,7 +776,11 @@ impl<'cfg> Source for RegistrySource<'cfg> { fn is_yanked(&mut self, pkg: PackageId) -> CargoResult { if !self.updated { - self.do_update()?; + // Try selectively updating just the index file for this package if possible. + if !self.index.update_index_file(pkg.name(), &mut *self.ops)? { + // It's not, so update the whole index. + self.do_update()?; + } } self.index.is_yanked(pkg, &mut *self.ops) } diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index 7ea5a2f8b59..191ce25382c 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -282,12 +282,11 @@ fn update_publish_then_update() { )); // Finally, build the first project again (with our newer Cargo.lock) which - // should force an update of the old registry, download the new crate, and + // should download the new index file from the registry, download the new crate, and // then build everything again. cargo(&p, "build") .with_stderr(format!( "\ -[UPDATING] [..] [PREFETCHING] index files ... [DOWNLOADING] crates ... [DOWNLOADED] a v0.1.1 (http registry `{reg}`) From c56c4d23d2b2146d7b4acfc3527e34388bf7ba35 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 4 Dec 2020 09:54:45 -0800 Subject: [PATCH 67/83] Avoid hitting assertion failures --- src/cargo/sources/registry/http_remote.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 9a1c7c278e7..34fe9dd414d 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -774,6 +774,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { if pkg.exists() { paths::remove_file(&pkg)?; } + // Also reset self.fresh so we don't hit an assertion failure if we re-download. + self.fresh.remove(path); Ok(true) } From 7efa4da4cd23183c7019409ae7c9bcdf3a824b32 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Fri, 4 Dec 2020 14:19:22 -0800 Subject: [PATCH 68/83] All index paths should be lowercase --- src/cargo/sources/registry/index.rs | 36 +++++++---------------------- src/cargo/sources/registry/mod.rs | 29 ++++++++++++++++++++++- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index 64ac23351a6..35b8cde17ae 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -68,7 +68,7 @@ use crate::core::dependency::Dependency; use crate::core::{PackageId, SourceId, Summary}; -use crate::sources::registry::{make_dep_prefix, RegistryData, RegistryPackage}; +use crate::sources::registry::{make_dep_index_path, RegistryData, RegistryPackage}; use crate::util::interning::InternedString; use crate::util::paths; use crate::util::{internal, CargoResult, Config, Filesystem, ToSemver}; @@ -333,16 +333,8 @@ impl<'cfg> RegistryIndex<'cfg> { // See module comment in `registry/mod.rs` for why this is structured // the way it is. - let fs_name = name - .chars() - .flat_map(|c| c.to_lowercase()) - .collect::(); - let raw_path = match fs_name.len() { - 1 => format!("1/{}", fs_name), - 2 => format!("2/{}", fs_name), - 3 => format!("3/{}/{}", &fs_name[..1], fs_name), - _ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name), - }; + let raw_path = make_dep_index_path(&name); + let raw_path = raw_path.to_string_lossy(); // Attempt to handle misspellings by searching for a chain of related // names to the original `raw_path` name. Only return summaries @@ -378,10 +370,8 @@ impl<'cfg> RegistryIndex<'cfg> { ) -> CargoResult { let path = load.index_path(); let root = load.assert_index_locked(path).to_path_buf(); - let mut path = make_dep_prefix(&pkg); - path.push('/'); - path.push_str(&pkg); - if load.update_index_file(&root, &Path::new(&path))? { + let path = make_dep_index_path(&pkg); + if load.update_index_file(&root, &path)? { self.summaries_cache.remove(&pkg); Ok(true) } else { @@ -507,13 +497,6 @@ impl<'cfg> RegistryIndex<'cfg> { log::debug!("prefetching transitive dependencies"); - let relative = |name: &str| { - let mut prefix = make_dep_prefix(name); - prefix.push('/'); - prefix.push_str(name); - prefix - }; - // Since we allow dependency cycles in crates, we may end up walking in circles forever if // we just iteratively handled each candidate as we discovered it. The real resolver is // smart about how it avoids walking endlessly in cycles, but in this simple greedy @@ -535,8 +518,7 @@ impl<'cfg> RegistryIndex<'cfg> { if pkg.source_id() == self.source_id { let name = pkg.name(); log::trace!("prefetching from lockfile: {}", name); - let relative = relative(&*name); - load.prefetch(root, &Path::new(&relative), name, None, true)?; + load.prefetch(root, &make_dep_index_path(&*name), name, None, true)?; } } @@ -553,13 +535,12 @@ impl<'cfg> RegistryIndex<'cfg> { dep.package_name() ); - let relative = relative(&*dep.package_name()); // NOTE: We do not use UncanonicalizedIter here or below because if the user gave a // misspelling, it's fine if we don't prefetch their misspelling. The resolver will be // a bit slower, but then give them an error. load.prefetch( root, - &Path::new(&relative), + &make_dep_index_path(&*dep.package_name()), dep.package_name(), Some(dep.version_req()), false, @@ -645,10 +626,9 @@ impl<'cfg> RegistryIndex<'cfg> { } log::trace!("prefetching transitive dependency {}", dep.package_name()); - let relative = relative(&*dep.package_name()); load.prefetch( root, - Path::new(&relative), + &make_dep_index_path(&*dep.package_name()), dep.package_name(), Some(dep.version_req()), true, diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 858c6dd901b..480957c0d19 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -786,6 +786,20 @@ impl<'cfg> Source for RegistrySource<'cfg> { } } +fn make_dep_index_path(name: &str) -> PathBuf { + let fs_name = name + .chars() + .flat_map(|c| c.to_lowercase()) + .collect::(); + let raw_path = match fs_name.len() { + 1 => format!("1/{}", fs_name), + 2 => format!("2/{}", fs_name), + 3 => format!("3/{}/{}", &fs_name[..1], fs_name), + _ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name), + }; + PathBuf::from(raw_path) +} + fn make_dep_prefix(name: &str) -> String { match name.len() { 1 => String::from("1"), @@ -797,7 +811,20 @@ fn make_dep_prefix(name: &str) -> String { #[cfg(test)] mod tests { - use super::make_dep_prefix; + use super::{make_dep_index_path, make_dep_prefix}; + + #[test] + fn dep_path() { + use std::path::Path; + assert_eq!(make_dep_index_path("a"), Path::new("1/a")); + assert_eq!(make_dep_index_path("A"), Path::new("1/a")); + assert_eq!(make_dep_index_path("ab"), Path::new("2/ab")); + assert_eq!(make_dep_index_path("Ab"), Path::new("2/ab")); + assert_eq!(make_dep_index_path("abc"), Path::new("3/a/abc")); + assert_eq!(make_dep_index_path("Abc"), Path::new("3/a/abc")); + assert_eq!(make_dep_index_path("AbCd"), Path::new("ab/cd/abcd")); + assert_eq!(make_dep_index_path("aBcDe"), Path::new("ab/cd/abcde")); + } #[test] fn dep_prefix() { From 3c5c89b21b806e066773cedf988ca8040a469c3a Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 9 Dec 2020 15:01:20 -0800 Subject: [PATCH 69/83] Default to empty prefetch --- crates/resolver-tests/src/lib.rs | 8 -------- src/cargo/core/registry.rs | 7 ++++++- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/crates/resolver-tests/src/lib.rs b/crates/resolver-tests/src/lib.rs index 239507fdc32..ac2a6cb4383 100644 --- a/crates/resolver-tests/src/lib.rs +++ b/crates/resolver-tests/src/lib.rs @@ -126,14 +126,6 @@ pub fn resolve_with_config_raw( used: HashSet, }; impl<'a> Registry for MyRegistry<'a> { - fn prefetch( - &mut self, - _deps: &mut dyn Iterator>, - ) -> CargoResult<()> { - // Doing nothing is a valid way to prefetch. - Ok(()) - } - fn query( &mut self, dep: &Dependency, diff --git a/src/cargo/core/registry.rs b/src/cargo/core/registry.rs index 61b63daedb9..a362df8117e 100644 --- a/src/cargo/core/registry.rs +++ b/src/cargo/core/registry.rs @@ -17,7 +17,12 @@ use url::Url; /// See also `core::Source`. pub trait Registry { /// Give source the opportunity to batch pre-fetch dependency information. - fn prefetch(&mut self, deps: &mut dyn Iterator>) -> CargoResult<()>; + fn prefetch( + &mut self, + _deps: &mut dyn Iterator>, + ) -> CargoResult<()> { + Ok(()) + } /// Attempt to find the packages that match a dependency request. fn query( From a5e8b9f1eb5619744104c4386747abffb7c483e0 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 9 Dec 2020 15:02:04 -0800 Subject: [PATCH 70/83] Avoid extra indentation --- src/cargo/sources/registry/http_remote.rs | 64 ++++++++++++----------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 34fe9dd414d..fe7b353919f 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -230,38 +230,40 @@ const LAST_UPDATED_FILE: &str = ".last-updated"; impl<'cfg> RegistryData for HttpRegistry<'cfg> { fn prepare(&self) -> CargoResult<()> { - if !self.config.offline() { - let mut http = if let Ok(h) = self.http.try_borrow_mut() { - h - } else { - anyhow::bail!("concurrent index downloads are not yet supported"); - }; + if self.config.offline() { + return Ok(()); + } - if http.is_none() { - // NOTE: lifted from src/cargo/core/package.rs - // - // Ensure that we'll actually be able to acquire an HTTP handle later on - // once we start trying to download crates. This will weed out any - // problems with `.cargo/config` configuration related to HTTP. - // - // This way if there's a problem the error gets printed before we even - // hit the index, which may not actually read this configuration. - let mut handle = ops::http_handle(&self.config)?; - handle.get(true)?; - handle.follow_location(true)?; - - // NOTE: lifted from src/cargo/core/package.rs - // - // This is an option to `libcurl` which indicates that if there's a - // bunch of parallel requests to the same host they all wait until the - // pipelining status of the host is known. This means that we won't - // initiate dozens of connections to crates.io, but rather only one. - // Once the main one is opened we realized that pipelining is possible - // and multiplexing is possible with static.crates.io. All in all this - // reduces the number of connections done to a more manageable state. - try_old_curl!(handle.pipewait(true), "pipewait"); - *http = Some(handle); - } + let mut http = if let Ok(h) = self.http.try_borrow_mut() { + h + } else { + anyhow::bail!("concurrent index downloads are not yet supported"); + }; + + if http.is_none() { + // NOTE: lifted from src/cargo/core/package.rs + // + // Ensure that we'll actually be able to acquire an HTTP handle later on + // once we start trying to download crates. This will weed out any + // problems with `.cargo/config` configuration related to HTTP. + // + // This way if there's a problem the error gets printed before we even + // hit the index, which may not actually read this configuration. + let mut handle = ops::http_handle(&self.config)?; + handle.get(true)?; + handle.follow_location(true)?; + + // NOTE: lifted from src/cargo/core/package.rs + // + // This is an option to `libcurl` which indicates that if there's a + // bunch of parallel requests to the same host they all wait until the + // pipelining status of the host is known. This means that we won't + // initiate dozens of connections to crates.io, but rather only one. + // Once the main one is opened we realized that pipelining is possible + // and multiplexing is possible with static.crates.io. All in all this + // reduces the number of connections done to a more manageable state. + try_old_curl!(handle.pipewait(true), "pipewait"); + *http = Some(handle); } Ok(()) From a1383e06da99288023128f82cad7a342d3de3920 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 9 Dec 2020 15:03:24 -0800 Subject: [PATCH 71/83] Only be conditional on fields the server provided --- src/cargo/sources/registry/http_remote.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index fe7b353919f..9f9fde34b01 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -452,8 +452,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // Make sure we don't send data back if it's the same as we have in the index. if let Some((ref etag, ref last_modified, _)) = was { let mut list = List::new(); - list.append(&format!("If-None-Match: {}", etag))?; - list.append(&format!("If-Modified-Since: {}", last_modified))?; + if !etag.is_empty() { + list.append(&format!("If-None-Match: {}", etag))?; + } + if !last_modified.is_empty() { + list.append(&format!("If-Modified-Since: {}", last_modified))?; + } handle.http_headers(list)?; } From 5750fdf9a530680c9f1848e5168fca1d20af2480 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 9 Dec 2020 15:04:17 -0800 Subject: [PATCH 72/83] Fix cut-off comment --- src/cargo/sources/registry/http_remote.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 9f9fde34b01..b25c7ee066a 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -632,7 +632,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { // Walk all the requests that completed and handle their responses. // - // This will ultimately add more replies to self.downloads.eager, which we'll + // This will ultimately add more replies to self.downloads.eager, which we'll yield as + // we continue around the outer loop. while let Some((token, result)) = self.downloads.results.pop() { trace!("{} finished with {:?}", token, result); From e89e37cbb327f0997520928b56deaa9e008179aa Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 9 Dec 2020 15:10:19 -0800 Subject: [PATCH 73/83] Remove index files we get ~404 for --- src/cargo/sources/registry/http_remote.rs | 28 ++++++++++++++--------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index b25c7ee066a..538e6598d67 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -712,17 +712,23 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { "download finished for already-finished path" ); } - 403 | 404 => { - // Not Found response. - // We treat Forbidden as just being another expression for 404 - // from a server that does not want to reveal file names. - // The crate doesn't exist, so we simply do not yield it. - // Errors will eventually be yielded by load(). - } - 410 | 451 => { - // The crate was deleted from the registry. - // Errors will eventually be yielded by load(). - todo!("we should delete the local index file here if it exists"); + 403 | 404 | 410 | 451 => { + // Variants of a Not Found response. + // + // We treat Forbidden as just being another expression for 404 from a + // server that does not want to reveal file names. + // + // We treat Gone and Unavailable for Legal Reasons as equivalent to 404, + // since they still mean that the crate isn't there. + // + // Since the crate doesn't exist, we simply do not yield it. We also remove + // the index file if it exists. Errors will eventually be yielded by + // load(). + let path = self.config.assert_package_cache_locked(&self.index_path); + let pkg = path.join(&fetched.path); + if pkg.exists() { + paths::remove_file(pkg)?; + } } code => { anyhow::bail!( From ad63b9db0cf239ddf38fd2fd5d4c2a0915379a8f Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 9 Dec 2020 15:10:34 -0800 Subject: [PATCH 74/83] Be more helpful about HTTP status code errors --- src/cargo/sources/registry/http_remote.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 538e6598d67..22209433c7d 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -732,10 +732,14 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } code => { anyhow::bail!( - "prefetch: server returned unexpected HTTP status code {} for {}{}", + "prefetch: server returned unexpected HTTP status code {} for {}{}: {}", code, self.source_id.url(), - fetched.path.display() + fetched.path.display(), + String::from_utf8_lossy(&data) + .lines() + .next() + .expect("there is always a first line"), ); } } From 8b8e3dfeafd9221b5a4e27eaa1997e1e21ed60ef Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 9 Dec 2020 15:11:04 -0800 Subject: [PATCH 75/83] Spelling is hard --- src/cargo/sources/registry/http_remote.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 22209433c7d..7d851fb2c15 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -803,7 +803,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, ) -> CargoResult<()> { // NOTE: This is pretty much a synchronous version of the prefetch() + next_prefetched() - // dance. Much of the code is sort-of duplicated, which isn't great, but it's moderalyte + // dance. Much of the code is sort-of duplicated, which isn't great, but it's moderately // straightforward and works. When the real resolver supports a load returning "not yet", // load and prefetch can be merged. From 917f5d0de1e4f34698eeca74a54620fa4d717dc7 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 9 Dec 2020 15:13:56 -0800 Subject: [PATCH 76/83] Assert we have valid utf-8 paths --- src/cargo/sources/registry/index.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index 35b8cde17ae..ef36972218e 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -334,7 +334,9 @@ impl<'cfg> RegistryIndex<'cfg> { // See module comment in `registry/mod.rs` for why this is structured // the way it is. let raw_path = make_dep_index_path(&name); - let raw_path = raw_path.to_string_lossy(); + let raw_path = raw_path + .to_str() + .expect("path was generated from utf-8 name"); // Attempt to handle misspellings by searching for a chain of related // names to the original `raw_path` name. Only return summaries From cd5281d857b896714e06c4fc237267804f04df19 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 9 Dec 2020 15:37:53 -0800 Subject: [PATCH 77/83] Avoid duplicating crate download code --- src/cargo/sources/registry/download.rs | 108 ++++++++++++++++++++++ src/cargo/sources/registry/http_remote.rs | 93 ++----------------- src/cargo/sources/registry/mod.rs | 1 + src/cargo/sources/registry/remote.rs | 92 ++---------------- 4 files changed, 127 insertions(+), 167 deletions(-) create mode 100644 src/cargo/sources/registry/download.rs diff --git a/src/cargo/sources/registry/download.rs b/src/cargo/sources/registry/download.rs new file mode 100644 index 00000000000..8f375f5f384 --- /dev/null +++ b/src/cargo/sources/registry/download.rs @@ -0,0 +1,108 @@ +use crate::core::PackageId; +use crate::sources::registry::make_dep_prefix; +use crate::sources::registry::MaybeLock; +use crate::sources::registry::{ + RegistryData, CRATE_TEMPLATE, LOWER_PREFIX_TEMPLATE, PREFIX_TEMPLATE, VERSION_TEMPLATE, +}; +use crate::util::errors::{CargoResult, CargoResultExt}; +use crate::util::{Config, Filesystem, Sha256}; +use std::fmt::Write as FmtWrite; +use std::fs::{self, File, OpenOptions}; +use std::io::prelude::*; +use std::io::SeekFrom; +use std::path::Path; +use std::str; + +pub(super) fn filename(pkg: PackageId) -> String { + format!("{}-{}.crate", pkg.name(), pkg.version()) +} + +pub(super) fn download( + load: &mut dyn RegistryData, + path: &Path, + pkg: PackageId, + _checksum: &str, +) -> CargoResult { + // Attempt to open an read-only copy first to avoid an exclusive write + // lock and also work with read-only filesystems. Note that we check the + // length of the file like below to handle interrupted downloads. + // + // If this fails then we fall through to the exclusive path where we may + // have to redownload the file. + if let Ok(dst) = File::open(path) { + let meta = dst.metadata()?; + if meta.len() > 0 { + return Ok(MaybeLock::Ready(dst)); + } + } + + let config = load.config()?.unwrap(); + let mut url = config.dl; + if !url.contains(CRATE_TEMPLATE) + && !url.contains(VERSION_TEMPLATE) + && !url.contains(PREFIX_TEMPLATE) + && !url.contains(LOWER_PREFIX_TEMPLATE) + { + write!(url, "/{}/{}/download", CRATE_TEMPLATE, VERSION_TEMPLATE).unwrap(); + } + let prefix = make_dep_prefix(&*pkg.name()); + let url = url + .replace(CRATE_TEMPLATE, &*pkg.name()) + .replace(VERSION_TEMPLATE, &pkg.version().to_string()) + .replace(PREFIX_TEMPLATE, &prefix) + .replace(LOWER_PREFIX_TEMPLATE, &prefix.to_lowercase()); + + Ok(MaybeLock::Download { + url, + descriptor: pkg.to_string(), + }) +} + +pub(super) fn finish_download( + cache_path: &Filesystem, + config: &Config, + pkg: PackageId, + checksum: &str, + data: &[u8], +) -> CargoResult { + // Verify what we just downloaded + let actual = Sha256::new().update(data).finish_hex(); + if actual != checksum { + anyhow::bail!("failed to verify the checksum of `{}`", pkg) + } + + let filename = filename(pkg); + cache_path.create_dir()?; + let path = cache_path.join(&filename); + let path = config.assert_package_cache_locked(&path); + let mut dst = OpenOptions::new() + .create(true) + .read(true) + .write(true) + .open(&path) + .chain_err(|| format!("failed to open `{}`", path.display()))?; + let meta = dst.metadata()?; + if meta.len() > 0 { + return Ok(dst); + } + + dst.write_all(data)?; + dst.seek(SeekFrom::Start(0))?; + Ok(dst) +} + +pub(super) fn is_crate_downloaded( + cache_path: &Filesystem, + config: &Config, + pkg: PackageId, +) -> bool { + let filename = format!("{}-{}.crate", pkg.name(), pkg.version()); + let path = Path::new(&filename); + + let path = cache_path.join(path); + let path = config.assert_package_cache_locked(&path); + if let Ok(meta) = fs::metadata(path) { + return meta.len() > 0; + } + false +} diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 7d851fb2c15..38faf60a26b 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -4,26 +4,21 @@ use crate::core::{PackageId, SourceId}; use crate::ops; -use crate::sources::registry::make_dep_prefix; +use crate::sources::registry::download; use crate::sources::registry::MaybeLock; -use crate::sources::registry::{ - Fetched, RegistryConfig, RegistryData, CRATE_TEMPLATE, LOWER_PREFIX_TEMPLATE, PREFIX_TEMPLATE, - VERSION_TEMPLATE, -}; +use crate::sources::registry::{Fetched, RegistryConfig, RegistryData}; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; use crate::util::paths; -use crate::util::{self, Config, Filesystem, Progress, ProgressStyle, Sha256}; +use crate::util::{self, Config, Filesystem, Progress, ProgressStyle}; use bytesize::ByteSize; use curl::easy::{Easy, HttpVersion, List}; use curl::multi::{EasyHandle, Multi}; use log::{debug, trace}; use std::cell::{Cell, RefCell, RefMut}; use std::collections::{BTreeMap, HashMap, HashSet}; -use std::fmt::Write as FmtWrite; -use std::fs::{self, File, OpenOptions}; +use std::fs::File; use std::io::prelude::*; -use std::io::SeekFrom; use std::path::{Path, PathBuf}; use std::str; use std::time::Duration; @@ -185,10 +180,6 @@ impl<'cfg> HttpRegistry<'cfg> { } } - fn filename(&self, pkg: PackageId) -> String { - format!("{}-{}.crate", pkg.name(), pkg.version()) - } - fn http(&self) -> CargoResult> { let handle = if let Ok(h) = self.http.try_borrow_mut() { h @@ -1034,46 +1025,11 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { Ok(()) } - // NOTE: What follows is identical to remote.rs - - fn download(&mut self, pkg: PackageId, _checksum: &str) -> CargoResult { - let filename = self.filename(pkg); - - // Attempt to open an read-only copy first to avoid an exclusive write - // lock and also work with read-only filesystems. Note that we check the - // length of the file like below to handle interrupted downloads. - // - // If this fails then we fall through to the exclusive path where we may - // have to redownload the file. + fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult { + let filename = download::filename(pkg); let path = self.cache_path.join(&filename); let path = self.config.assert_package_cache_locked(&path); - if let Ok(dst) = File::open(&path) { - let meta = dst.metadata()?; - if meta.len() > 0 { - return Ok(MaybeLock::Ready(dst)); - } - } - - let config = self.config()?.unwrap(); - let mut url = config.dl; - if !url.contains(CRATE_TEMPLATE) - && !url.contains(VERSION_TEMPLATE) - && !url.contains(PREFIX_TEMPLATE) - && !url.contains(LOWER_PREFIX_TEMPLATE) - { - write!(url, "/{}/{}/download", CRATE_TEMPLATE, VERSION_TEMPLATE).unwrap(); - } - let prefix = make_dep_prefix(&*pkg.name()); - let url = url - .replace(CRATE_TEMPLATE, &*pkg.name()) - .replace(VERSION_TEMPLATE, &pkg.version().to_string()) - .replace(PREFIX_TEMPLATE, &prefix) - .replace(LOWER_PREFIX_TEMPLATE, &prefix.to_lowercase()); - - Ok(MaybeLock::Download { - url, - descriptor: pkg.to_string(), - }) + download::download(self, &path, pkg, checksum) } fn finish_download( @@ -1082,42 +1038,11 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { checksum: &str, data: &[u8], ) -> CargoResult { - // Verify what we just downloaded - let actual = Sha256::new().update(data).finish_hex(); - if actual != checksum { - anyhow::bail!("failed to verify the checksum of `{}`", pkg) - } - - let filename = self.filename(pkg); - self.cache_path.create_dir()?; - let path = self.cache_path.join(&filename); - let path = self.config.assert_package_cache_locked(&path); - let mut dst = OpenOptions::new() - .create(true) - .read(true) - .write(true) - .open(&path) - .chain_err(|| format!("failed to open `{}`", path.display()))?; - let meta = dst.metadata()?; - if meta.len() > 0 { - return Ok(dst); - } - - dst.write_all(data)?; - dst.seek(SeekFrom::Start(0))?; - Ok(dst) + download::finish_download(&self.cache_path, &self.config, pkg, checksum, data) } fn is_crate_downloaded(&self, pkg: PackageId) -> bool { - let filename = format!("{}-{}.crate", pkg.name(), pkg.version()); - let path = Path::new(&filename); - - let path = self.cache_path.join(path); - let path = self.config.assert_package_cache_locked(&path); - if let Ok(meta) = fs::metadata(path) { - return meta.len() > 0; - } - false + download::is_crate_downloaded(&self.cache_path, &self.config, pkg) } } diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 480957c0d19..32670be9259 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -466,6 +466,7 @@ pub enum MaybeLock { Download { url: String, descriptor: String }, } +mod download; mod http_remote; mod index; mod local; diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index f51546d8a34..9fcd469d4e5 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -1,22 +1,16 @@ use crate::core::{GitReference, PackageId, SourceId}; use crate::sources::git; -use crate::sources::registry::make_dep_prefix; +use crate::sources::registry::download; use crate::sources::registry::MaybeLock; -use crate::sources::registry::{ - RegistryConfig, RegistryData, CRATE_TEMPLATE, LOWER_PREFIX_TEMPLATE, PREFIX_TEMPLATE, - VERSION_TEMPLATE, -}; +use crate::sources::registry::{RegistryConfig, RegistryData}; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; use crate::util::paths; -use crate::util::{Config, Filesystem, Sha256}; +use crate::util::{Config, Filesystem}; use lazycell::LazyCell; use log::{debug, trace}; use std::cell::{Cell, Ref, RefCell}; -use std::fmt::Write as FmtWrite; -use std::fs::{self, File, OpenOptions}; -use std::io::prelude::*; -use std::io::SeekFrom; +use std::fs::File; use std::mem; use std::path::Path; use std::str; @@ -127,10 +121,6 @@ impl<'cfg> RemoteRegistry<'cfg> { *self.tree.borrow_mut() = Some(tree); Ok(Ref::map(self.tree.borrow(), |s| s.as_ref().unwrap())) } - - fn filename(&self, pkg: PackageId) -> String { - format!("{}-{}.crate", pkg.name(), pkg.version()) - } } const LAST_UPDATED_FILE: &str = ".last-updated"; @@ -239,44 +229,11 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { Ok(()) } - fn download(&mut self, pkg: PackageId, _checksum: &str) -> CargoResult { - let filename = self.filename(pkg); - - // Attempt to open an read-only copy first to avoid an exclusive write - // lock and also work with read-only filesystems. Note that we check the - // length of the file like below to handle interrupted downloads. - // - // If this fails then we fall through to the exclusive path where we may - // have to redownload the file. + fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult { + let filename = download::filename(pkg); let path = self.cache_path.join(&filename); let path = self.config.assert_package_cache_locked(&path); - if let Ok(dst) = File::open(&path) { - let meta = dst.metadata()?; - if meta.len() > 0 { - return Ok(MaybeLock::Ready(dst)); - } - } - - let config = self.config()?.unwrap(); - let mut url = config.dl; - if !url.contains(CRATE_TEMPLATE) - && !url.contains(VERSION_TEMPLATE) - && !url.contains(PREFIX_TEMPLATE) - && !url.contains(LOWER_PREFIX_TEMPLATE) - { - write!(url, "/{}/{}/download", CRATE_TEMPLATE, VERSION_TEMPLATE).unwrap(); - } - let prefix = make_dep_prefix(&*pkg.name()); - let url = url - .replace(CRATE_TEMPLATE, &*pkg.name()) - .replace(VERSION_TEMPLATE, &pkg.version().to_string()) - .replace(PREFIX_TEMPLATE, &prefix) - .replace(LOWER_PREFIX_TEMPLATE, &prefix.to_lowercase()); - - Ok(MaybeLock::Download { - url, - descriptor: pkg.to_string(), - }) + download::download(self, &path, pkg, checksum) } fn finish_download( @@ -285,42 +242,11 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { checksum: &str, data: &[u8], ) -> CargoResult { - // Verify what we just downloaded - let actual = Sha256::new().update(data).finish_hex(); - if actual != checksum { - anyhow::bail!("failed to verify the checksum of `{}`", pkg) - } - - let filename = self.filename(pkg); - self.cache_path.create_dir()?; - let path = self.cache_path.join(&filename); - let path = self.config.assert_package_cache_locked(&path); - let mut dst = OpenOptions::new() - .create(true) - .read(true) - .write(true) - .open(&path) - .chain_err(|| format!("failed to open `{}`", path.display()))?; - let meta = dst.metadata()?; - if meta.len() > 0 { - return Ok(dst); - } - - dst.write_all(data)?; - dst.seek(SeekFrom::Start(0))?; - Ok(dst) + download::finish_download(&self.cache_path, &self.config, pkg, checksum, data) } fn is_crate_downloaded(&self, pkg: PackageId) -> bool { - let filename = format!("{}-{}.crate", pkg.name(), pkg.version()); - let path = Path::new(&filename); - - let path = self.cache_path.join(path); - let path = self.config.assert_package_cache_locked(&path); - if let Ok(meta) = fs::metadata(path) { - return meta.len() > 0; - } - false + download::is_crate_downloaded(&self.cache_path, &self.config, pkg) } } From 3382ddf03586359a4dce25957a58350cf6b5647c Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Wed, 9 Dec 2020 15:59:56 -0800 Subject: [PATCH 78/83] Avoid adding a new SourceId --- src/cargo/core/source/source_id.rs | 43 +++++------------------ src/cargo/ops/registry.rs | 8 +---- src/cargo/sources/config.rs | 18 +--------- src/cargo/sources/registry/http_remote.rs | 30 ++++++++++------ src/cargo/sources/registry/mod.rs | 6 +--- src/cargo/util/canonical_url.rs | 13 ++++++- tests/testsuite/http_registry.rs | 26 +++++++------- 7 files changed, 57 insertions(+), 87 deletions(-) diff --git a/src/cargo/core/source/source_id.rs b/src/cargo/core/source/source_id.rs index ef5f01d83a0..5331e51e454 100644 --- a/src/cargo/core/source/source_id.rs +++ b/src/cargo/core/source/source_id.rs @@ -54,10 +54,6 @@ enum SourceKind { LocalRegistry, /// A directory-based registry. Directory, - /// A remote registry accessed over HTTP. - /// - /// The protocol is specified by [this RFC](https://github.com/rust-lang/rfcs/pull/2789). - Http, } /// Information to find a specific commit in a Git repository. @@ -140,8 +136,9 @@ impl SourceId { .with_precise(Some("locked".to_string()))) } "sparse" => { - let url = url.into_url()?; - Ok(SourceId::new(SourceKind::Http, url)?.with_precise(Some("locked".to_string()))) + let url = string.into_url()?; + Ok(SourceId::new(SourceKind::Registry, url)? + .with_precise(Some("locked".to_string()))) } "path" => { let url = url.into_url()?; @@ -176,11 +173,6 @@ impl SourceId { SourceId::new(SourceKind::Registry, url.clone()) } - /// Creates a SourceId from a RFC HTTP URL. - pub fn for_http_registry(url: &Url) -> CargoResult { - SourceId::new(SourceKind::Http, url.clone()) - } - /// Creates a SourceId from a local registry path. pub fn for_local_registry(path: &Path) -> CargoResult { let url = path.into_url()?; @@ -207,11 +199,7 @@ impl SourceId { pub fn alt_registry(config: &Config, key: &str) -> CargoResult { let url = config.get_registry_index(key)?; - let (kind, url) = if let Some(url) = url.to_string().strip_prefix("sparse+") { - (SourceKind::Http, url.into_url()?) - } else { - (SourceKind::Registry, url) - }; + let (kind, url) = (SourceKind::Registry, url); Ok(SourceId::wrap(SourceIdInner { kind, canonical_url: CanonicalUrl::new(&url)?, @@ -259,7 +247,7 @@ impl SourceId { pub fn is_registry(self) -> bool { matches!( self.inner.kind, - SourceKind::Registry | SourceKind::Http | SourceKind::LocalRegistry + SourceKind::Registry | SourceKind::LocalRegistry ) } @@ -268,7 +256,7 @@ impl SourceId { /// "remote" may also mean a file URL to a git index, so it is not /// necessarily "remote". This just means it is not `local-registry`. pub fn is_remote_registry(self) -> bool { - matches!(self.inner.kind, SourceKind::Registry | SourceKind::Http) + matches!(self.inner.kind, SourceKind::Registry) } /// Returns `true` if this source from a Git repository. @@ -292,11 +280,9 @@ impl SourceId { }; Ok(Box::new(PathSource::new(&path, self, config))) } - SourceKind::Http => Ok(Box::new(RegistrySource::rfc_http( - self, - yanked_whitelist, - config, - ))), + SourceKind::Registry if self.url().scheme().starts_with("sparse+") => Ok(Box::new( + RegistrySource::rfc_http(self, yanked_whitelist, config), + )), SourceKind::Registry => Ok(Box::new(RegistrySource::remote( self, yanked_whitelist, @@ -413,10 +399,6 @@ impl Ord for SourceId { (SourceKind::Path, _) => return Ordering::Less, (_, SourceKind::Path) => return Ordering::Greater, - (SourceKind::Http, SourceKind::Http) => {} - (SourceKind::Http, _) => return Ordering::Less, - (_, SourceKind::Http) => return Ordering::Greater, - (SourceKind::Registry, SourceKind::Registry) => {} (SourceKind::Registry, _) => return Ordering::Less, (_, SourceKind::Registry) => return Ordering::Greater, @@ -517,7 +499,6 @@ impl fmt::Display for SourceId { Ok(()) } SourceKind::Path => write!(f, "{}", url_display(&self.inner.url)), - SourceKind::Http => write!(f, "http registry `{}`", url_display(&self.inner.url)), SourceKind::Registry => write!(f, "registry `{}`", url_display(&self.inner.url)), SourceKind::LocalRegistry => write!(f, "registry `{}`", url_display(&self.inner.url)), SourceKind::Directory => write!(f, "dir {}", url_display(&self.inner.url)), @@ -564,7 +545,6 @@ impl Hash for SourceId { SourceKind::Registry => 2usize.hash(into), SourceKind::LocalRegistry => 3usize.hash(into), SourceKind::Directory => 4usize.hash(into), - SourceKind::Http => 5usize.hash(into), } match self.inner.kind { SourceKind::Git(_) => self.inner.canonical_url.hash(into), @@ -601,11 +581,6 @@ impl<'a> fmt::Display for SourceIdAsUrl<'a> { } Ok(()) } - SourceIdInner { - kind: SourceKind::Http, - ref url, - .. - } => write!(f, "sparse+{}", url), SourceIdInner { kind: SourceKind::Registry, ref url, diff --git a/src/cargo/ops/registry.rs b/src/cargo/ops/registry.rs index 0087890d09c..b4ff426827a 100644 --- a/src/cargo/ops/registry.rs +++ b/src/cargo/ops/registry.rs @@ -838,13 +838,7 @@ fn get_source_id( ) -> CargoResult { match (reg, index) { (Some(r), _) => SourceId::alt_registry(config, r), - (_, Some(i)) => { - if let Some(i) = i.strip_prefix("sparse+") { - SourceId::for_http_registry(&i.into_url()?) - } else { - SourceId::for_registry(&i.into_url()?) - } - } + (_, Some(i)) => SourceId::for_registry(&i.into_url()?), _ => { let map = SourceConfigMap::new(config)?; let src = map.load(SourceId::crates_io(config)?, &HashSet::new())?; diff --git a/src/cargo/sources/config.rs b/src/cargo/sources/config.rs index bc313122a47..71a9a7194c0 100644 --- a/src/cargo/sources/config.rs +++ b/src/cargo/sources/config.rs @@ -207,23 +207,7 @@ restore the source replacement configuration to continue the build let mut srcs = Vec::new(); if let Some(registry) = def.registry { let url = url(®istry, &format!("source.{}.registry", name))?; - - if url.scheme().starts_with("sparse+") { - if !self.config.cli_unstable().http_registry { - bail!("Usage of HTTP-based registries requires `-Z http-registry`") - } - - // NOTE: it is illegal to use set_scheme to change sparse+http(s) to http(s). - let url = url - .to_string() - .strip_prefix("sparse+") - .unwrap() - .into_url() - .unwrap(); - srcs.push(SourceId::for_http_registry(&url)?); - } else { - srcs.push(SourceId::for_registry(&url)?); - } + srcs.push(SourceId::for_registry(&url)?); } if let Some(local_registry) = def.local_registry { let path = local_registry.resolve_path(self.config); diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 38faf60a26b..e729bbaa53a 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -10,7 +10,7 @@ use crate::sources::registry::{Fetched, RegistryConfig, RegistryData}; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; use crate::util::paths; -use crate::util::{self, Config, Filesystem, Progress, ProgressStyle}; +use crate::util::{self, Config, Filesystem, IntoUrl, Progress, ProgressStyle}; use bytesize::ByteSize; use curl::easy::{Easy, HttpVersion, List}; use curl::multi::{EasyHandle, Multi}; @@ -23,6 +23,7 @@ use std::path::{Path, PathBuf}; use std::str; use std::time::Duration; use std::time::Instant; +use url::Url; const ETAG: &'static [u8] = b"ETag"; const LAST_MODIFIED: &'static [u8] = b"Last-Modified"; @@ -57,6 +58,9 @@ pub struct HttpRegistry<'cfg> { source_id: SourceId, config: &'cfg Config, + /// Store the server URL without the protocol prefix (sparse+) + url: Url, + /// Cached HTTP handle for synchronous requests (RegistryData::load). http: RefCell>, @@ -149,11 +153,19 @@ struct Download { impl<'cfg> HttpRegistry<'cfg> { pub fn new(source_id: SourceId, config: &'cfg Config, name: &str) -> HttpRegistry<'cfg> { + let url = source_id + .url() + .to_string() + .trim_start_matches("sparse+") + .into_url() + .expect("a url with the protocol stripped should still be valid"); + HttpRegistry { index_path: config.registry_index_path().join(name), cache_path: config.registry_cache_path().join(name), source_id, config, + url, http: RefCell::new(None), prefetch: Multi::new(), multiplexing: false, @@ -415,13 +427,12 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { } // Looks like we're going to have to bite the bullet and do a network request. - let url = self.source_id.url(); self.prepare()?; let mut handle = ops::http_handle(self.config)?; - debug!("prefetch {}{}", url, path.display()); + debug!("prefetch {}{}", self.url, path.display()); handle.get(true)?; - handle.url(&format!("{}{}", url, path.display()))?; + handle.url(&format!("{}{}", self.url, path.display()))?; handle.follow_location(true)?; // Enable HTTP/2 if possible. @@ -725,7 +736,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { anyhow::bail!( "prefetch: server returned unexpected HTTP status code {} for {}{}: {}", code, - self.source_id.url(), + self.url, fetched.path.display(), String::from_utf8_lossy(&data) .lines() @@ -857,18 +868,17 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { None }; - let url = self.source_id.url(); if self.config.offline() { anyhow::bail!( "can't download index file from '{}': you are in offline mode (--offline)", - url + self.url ); } self.prepare()?; let mut handle = self.http()?; - debug!("fetch {}{}", url, path.display()); - handle.url(&format!("{}{}", url, path.display()))?; + debug!("fetch {}{}", self.url, path.display()); + handle.url(&format!("{}{}", self.url, path.display()))?; if let Some((ref etag, ref last_modified, _)) = was { let mut list = List::new(); @@ -950,7 +960,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> { anyhow::bail!( "load: server returned unexpected HTTP status code {} for {}{}", code, - self.source_id.url(), + self.url, path.display() ); } diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 32670be9259..2b45174f9e4 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -322,11 +322,7 @@ impl<'a> RegistryDependency<'a> { } = self; let id = if let Some(registry) = ®istry { - if let Some(registry) = registry.strip_prefix("sparse+") { - SourceId::for_http_registry(®istry.into_url()?)? - } else { - SourceId::for_registry(®istry.into_url()?)? - } + SourceId::for_registry(®istry.into_url()?)? } else { default }; diff --git a/src/cargo/util/canonical_url.rs b/src/cargo/util/canonical_url.rs index c6f30527932..01b8df7e871 100644 --- a/src/cargo/util/canonical_url.rs +++ b/src/cargo/util/canonical_url.rs @@ -1,4 +1,4 @@ -use crate::util::errors::CargoResult; +use crate::util::{errors::CargoResult, IntoUrl}; use std::hash::{self, Hash}; use url::Url; @@ -56,6 +56,17 @@ impl CanonicalUrl { url.path_segments_mut().unwrap().pop().push(&last); } + // Ignore the protocol specifier (if any). + if url.scheme().starts_with("sparse+") { + // NOTE: it is illegal to use set_scheme to change sparse+http(s) to http(s). + url = url + .to_string() + .strip_prefix("sparse+") + .expect("we just found that prefix") + .into_url() + .expect("a valid url without a protocol specifier should still be valid"); + } + Ok(CanonicalUrl(url)) } diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index 191ce25382c..92beeccfb1f 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -47,7 +47,7 @@ fn setup() -> RegistryServer { #[cargo_test] fn simple() { let server = setup(); - let url = format!("http://{}/", server.addr()); + let url = format!("sparse+http://{}", server.addr()); let p = project() .file( "Cargo.toml", @@ -72,7 +72,7 @@ fn simple() { [UPDATING] `{reg}` index [PREFETCHING] index files ... [DOWNLOADING] crates ... -[DOWNLOADED] bar v0.0.1 (http registry `{reg}`) +[DOWNLOADED] bar v0.0.1 (registry `{reg}`) [COMPILING] bar v0.0.1 [COMPILING] foo v0.0.1 ([CWD]) [FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s @@ -99,7 +99,7 @@ fn simple() { #[cargo_test] fn deps() { let server = setup(); - let url = format!("http://{}/", server.addr()); + let url = format!("sparse+http://{}", server.addr()); let p = project() .file( "Cargo.toml", @@ -125,8 +125,8 @@ fn deps() { [UPDATING] `{reg}` index [PREFETCHING] index files ... [DOWNLOADING] crates ... -[DOWNLOADED] [..] v0.0.1 (http registry `{reg}`) -[DOWNLOADED] [..] v0.0.1 (http registry `{reg}`) +[DOWNLOADED] [..] v0.0.1 (registry `{reg}`) +[DOWNLOADED] [..] v0.0.1 (registry `{reg}`) [COMPILING] baz v0.0.1 [COMPILING] bar v0.0.1 [COMPILING] foo v0.0.1 ([CWD]) @@ -175,7 +175,7 @@ required by package `foo v0.0.1 ([..])` #[cargo_test] fn update_registry() { let server = setup(); - let url = format!("http://{}/", server.addr()); + let url = format!("sparse+http://{}", server.addr()); Package::new("init", "0.0.1").publish(); let p = project() @@ -213,7 +213,7 @@ required by package `foo v0.0.1 ([..])` [UPDATING] `{reg}` index [PREFETCHING] index files ... [DOWNLOADING] crates ... -[DOWNLOADED] notyet v0.0.1 (http registry `{reg}`) +[DOWNLOADED] notyet v0.0.1 (registry `{reg}`) [COMPILING] notyet v0.0.1 [COMPILING] foo v0.0.1 ([CWD]) [FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s @@ -226,7 +226,7 @@ required by package `foo v0.0.1 ([..])` #[cargo_test] fn update_publish_then_update() { let server = setup(); - let url = format!("http://{}/", server.addr()); + let url = format!("sparse+http://{}", server.addr()); // First generate a Cargo.lock and a clone of the registry index at the // "head" of the current registry. @@ -289,7 +289,7 @@ fn update_publish_then_update() { "\ [PREFETCHING] index files ... [DOWNLOADING] crates ... -[DOWNLOADED] a v0.1.1 (http registry `{reg}`) +[DOWNLOADED] a v0.1.1 (registry `{reg}`) [COMPILING] a v0.1.1 [COMPILING] foo v0.5.0 ([CWD]) [FINISHED] dev [unoptimized + debuginfo] target(s) in [..]s @@ -302,7 +302,7 @@ fn update_publish_then_update() { #[cargo_test] fn update_multiple_packages() { let server = setup(); - let url = format!("http://{}/", server.addr()); + let url = format!("sparse+http://{}", server.addr()); let p = project() .file( "Cargo.toml", @@ -353,9 +353,9 @@ fn update_multiple_packages() { .run(); cargo(&p, "build") - .with_stderr_contains(format!("[DOWNLOADED] a v0.1.1 (http registry `{}`)", url)) - .with_stderr_contains(format!("[DOWNLOADED] b v0.1.1 (http registry `{}`)", url)) - .with_stderr_contains(format!("[DOWNLOADED] c v0.1.1 (http registry `{}`)", url)) + .with_stderr_contains(format!("[DOWNLOADED] a v0.1.1 (registry `{}`)", url)) + .with_stderr_contains(format!("[DOWNLOADED] b v0.1.1 (registry `{}`)", url)) + .with_stderr_contains(format!("[DOWNLOADED] c v0.1.1 (registry `{}`)", url)) .with_stderr_contains("[COMPILING] a v0.1.1") .with_stderr_contains("[COMPILING] b v0.1.1") .with_stderr_contains("[COMPILING] c v0.1.1") From 78718242bdbccf4531f2258b324226f6ba28c40f Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 14 Dec 2020 12:00:44 -0800 Subject: [PATCH 79/83] Use masquerade_as_nightly --- tests/testsuite/http_registry.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index 92beeccfb1f..7929d3262da 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -16,8 +16,7 @@ use std::path::Path; fn cargo(p: &cargo_test_support::Project, s: &str) -> cargo_test_support::Execs { let mut e = p.cargo(s); - e.arg("-Zhttp-registry") - .env("__CARGO_TEST_CHANNEL_OVERRIDE_DO_NOT_USE_THIS", "nightly"); + e.arg("-Zhttp-registry").masquerade_as_nightly_cargo(); e } From ee333d355b95736afaa30ed33f31c0563c16cca9 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 14 Dec 2020 12:01:18 -0800 Subject: [PATCH 80/83] Suggest correct -Z name --- src/bin/cargo/cli.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/cargo/cli.rs b/src/bin/cargo/cli.rs index e232c553916..b804e8d9c4d 100644 --- a/src/bin/cargo/cli.rs +++ b/src/bin/cargo/cli.rs @@ -44,7 +44,7 @@ Available unstable (nightly-only) flags: -Z terminal-width -- Provide a terminal width to rustc for error truncation -Z namespaced-features -- Allow features with `dep:` prefix -Z weak-dep-features -- Allow `dep_name?/feature` feature syntax - -Z http-registries -- Support HTTP-based crate registries + -Z http-registry -- Support HTTP-based crate registries Run with 'cargo -Z [FLAG] [SUBCOMMAND]'" ); From f1f63cc292163c7e158e10a63288a2c1e06bc001 Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 14 Dec 2020 12:08:18 -0800 Subject: [PATCH 81/83] Make sparse+ an impl detail of RegistrySource This also re-enables the requirement of -Z http-registry --- src/cargo/core/source/source_id.rs | 5 +---- src/cargo/ops/registry.rs | 3 +-- src/cargo/sources/registry/mod.rs | 30 +++++++++++++++++------------- tests/testsuite/search.rs | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/cargo/core/source/source_id.rs b/src/cargo/core/source/source_id.rs index 5331e51e454..e91f21a9c18 100644 --- a/src/cargo/core/source/source_id.rs +++ b/src/cargo/core/source/source_id.rs @@ -280,14 +280,11 @@ impl SourceId { }; Ok(Box::new(PathSource::new(&path, self, config))) } - SourceKind::Registry if self.url().scheme().starts_with("sparse+") => Ok(Box::new( - RegistrySource::rfc_http(self, yanked_whitelist, config), - )), SourceKind::Registry => Ok(Box::new(RegistrySource::remote( self, yanked_whitelist, config, - ))), + )?)), SourceKind::LocalRegistry => { let path = match self.inner.url.to_file_path() { Ok(p) => p, diff --git a/src/cargo/ops/registry.rs b/src/cargo/ops/registry.rs index b4ff426827a..9e0d1809d9b 100644 --- a/src/cargo/ops/registry.rs +++ b/src/cargo/ops/registry.rs @@ -406,10 +406,9 @@ fn registry( sid ); } - // TODO: this will probably fail for SourceKind::Http at the moment let api_host = { let _lock = config.acquire_package_cache_lock()?; - let mut src = RegistrySource::remote(sid, &HashSet::new(), config); + let mut src = RegistrySource::remote(sid, &HashSet::new(), config)?; // Only update the index if the config is not available or `force` is set. let cfg = src.config(); let mut updated_cfg = || { diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 2b45174f9e4..f168eccca5c 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -475,24 +475,28 @@ fn short_name(id: SourceId) -> String { } impl<'cfg> RegistrySource<'cfg> { - pub fn rfc_http( - source_id: SourceId, - yanked_whitelist: &HashSet, - config: &'cfg Config, - ) -> RegistrySource<'cfg> { - let name = short_name(source_id); - let ops = http_remote::HttpRegistry::new(source_id, config, &name); - RegistrySource::new(source_id, config, &name, Box::new(ops), yanked_whitelist) - } - pub fn remote( source_id: SourceId, yanked_whitelist: &HashSet, config: &'cfg Config, - ) -> RegistrySource<'cfg> { + ) -> CargoResult> { let name = short_name(source_id); - let ops = remote::RemoteRegistry::new(source_id, config, &name); - RegistrySource::new(source_id, config, &name, Box::new(ops), yanked_whitelist) + let ops = if source_id.url().scheme().starts_with("sparse+") { + if !config.cli_unstable().http_registry { + anyhow::bail!("Usage of HTTP-based registries requires `-Z http-registry`"); + } + + Box::new(http_remote::HttpRegistry::new(source_id, config, &name)) as Box<_> + } else { + Box::new(remote::RemoteRegistry::new(source_id, config, &name)) as Box<_> + }; + Ok(RegistrySource::new( + source_id, + config, + &name, + ops, + yanked_whitelist, + )) } pub fn local( diff --git a/tests/testsuite/search.rs b/tests/testsuite/search.rs index 0d239b3b455..94ba9fff17c 100644 --- a/tests/testsuite/search.rs +++ b/tests/testsuite/search.rs @@ -150,7 +150,7 @@ fn not_update() { paths::home().join(".cargo"), ); let lock = cfg.acquire_package_cache_lock().unwrap(); - let mut regsrc = RegistrySource::remote(sid, &HashSet::new(), &cfg); + let mut regsrc = RegistrySource::remote(sid, &HashSet::new(), &cfg).unwrap(); regsrc.update().unwrap(); drop(lock); From 94ba59e2cbcc44b454d8daad188a39eff6df26ca Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 14 Dec 2020 12:13:51 -0800 Subject: [PATCH 82/83] Add test for nightly-only --- tests/testsuite/http_registry.rs | 40 ++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index 7929d3262da..f5ac48f4a76 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -43,6 +43,46 @@ fn setup() -> RegistryServer { server } +#[cargo_test] +fn not_on_stable() { + let _server = setup(); + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "foo" + version = "0.0.1" + authors = [] + + [dependencies] + bar = ">= 0.0.0" + "#, + ) + .file("src/main.rs", "fn main() {}") + .build(); + + Package::new("bar", "0.0.1").publish(); + + p.cargo("build") + .with_status(101) + .with_stderr(&format!( + "\ +error: failed to prefetch dependencies + +Caused by: + failed to load source for dependency `bar` + +Caused by: + Unable to update registry `https://github.com/rust-lang/crates.io-index` + +Caused by: + Usage of HTTP-based registries requires `-Z http-registry` +" + )) + .run(); +} + #[cargo_test] fn simple() { let server = setup(); From 365e9f22894373d2988615dbd30f9f7ca8f8a9de Mon Sep 17 00:00:00 2001 From: Jon Gjengset Date: Mon, 14 Dec 2020 13:11:40 -0800 Subject: [PATCH 83/83] Do not prefetch w/o -Z http-registry --- src/cargo/core/resolver/mod.rs | 16 +++++++++------- tests/testsuite/bad_config.rs | 8 ++++---- tests/testsuite/cargo_features.rs | 2 +- tests/testsuite/directory.rs | 2 +- tests/testsuite/git.rs | 4 ++-- tests/testsuite/git_auth.rs | 6 +++--- tests/testsuite/http_registry.rs | 2 +- tests/testsuite/local_registry.rs | 2 +- tests/testsuite/offline.rs | 2 +- tests/testsuite/path.rs | 2 +- tests/testsuite/registry.rs | 2 +- tests/testsuite/workspaces.rs | 2 +- 12 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index e8b716d0746..e8b19c23794 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -136,13 +136,15 @@ pub fn resolve( }; // First, allow the source to batch pre-fetch dependencies we may need. - registry - .prefetch( - &mut summaries - .iter() - .flat_map(|summary| summary.0.dependencies().iter().map(Cow::Borrowed)), - ) - .chain_err(|| "failed to prefetch dependencies")?; + if config.map_or(false, |c| c.cli_unstable().http_registry) { + registry + .prefetch( + &mut summaries + .iter() + .flat_map(|summary| summary.0.dependencies().iter().map(Cow::Borrowed)), + ) + .chain_err(|| "failed to prefetch dependencies")?; + } let mut registry = RegistryQueryer::new(registry, replacements, try_to_use, minimal_versions, config); diff --git a/tests/testsuite/bad_config.rs b/tests/testsuite/bad_config.rs index 7387dbe7f81..a71d66c54c7 100644 --- a/tests/testsuite/bad_config.rs +++ b/tests/testsuite/bad_config.rs @@ -368,7 +368,7 @@ fn bad_git_dependency() { .with_stderr( "\ [UPDATING] git repository `file:///` -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `foo` as a dependency of package `foo v0.0.0 [..]` Caused by: failed to load source for dependency `foo` @@ -934,7 +934,7 @@ fn bad_source_config2() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `bar` as a dependency of package `foo v0.0.0 [..]` Caused by: failed to load source for dependency `bar` @@ -980,7 +980,7 @@ fn bad_source_config3() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `bar` as a dependency of package `foo v0.0.0 [..]` Caused by: failed to load source for dependency `bar` @@ -1028,7 +1028,7 @@ fn bad_source_config4() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `bar` as a dependency of package `foo v0.0.0 ([..])` Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/cargo_features.rs b/tests/testsuite/cargo_features.rs index 57ea6ca14b8..02a41c4fde6 100644 --- a/tests/testsuite/cargo_features.rs +++ b/tests/testsuite/cargo_features.rs @@ -199,7 +199,7 @@ fn nightly_feature_requires_nightly_in_dep() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `a` as a dependency of package `b v0.0.1 ([..])` Caused by: failed to load source for dependency `a` diff --git a/tests/testsuite/directory.rs b/tests/testsuite/directory.rs index e4dcdec2258..85a5dd5842c 100644 --- a/tests/testsuite/directory.rs +++ b/tests/testsuite/directory.rs @@ -653,7 +653,7 @@ fn git_override_requires_lockfile() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `git` as a dependency of package `foo v0.0.1 ([..])` Caused by: failed to load source for dependency `git` diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index 2a616091cd3..548d7264c2a 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -938,7 +938,7 @@ fn dep_with_bad_submodule() { "\ [UPDATING] git repository [..] [UPDATING] git submodule `file://[..]/dep2` -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `dep1` as a dependency of package `foo v0.5.0 [..]` Caused by: failed to load source for dependency `dep1` @@ -2362,7 +2362,7 @@ fn invalid_git_dependency_manifest() { .with_stderr(&format!( "\ [UPDATING] git repository `{}` -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `dep1` as a dependency of package `foo v0.5.0 ([..])` Caused by: failed to load source for dependency `dep1` diff --git a/tests/testsuite/git_auth.rs b/tests/testsuite/git_auth.rs index e2d69cd5ac6..85702290af7 100644 --- a/tests/testsuite/git_auth.rs +++ b/tests/testsuite/git_auth.rs @@ -137,7 +137,7 @@ fn http_auth_offered() { .with_stderr_contains(&format!( "\ [UPDATING] git repository `http://{addr}/foo/bar` -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 [..]` Caused by: failed to load source for dependency `bar` @@ -299,7 +299,7 @@ fn net_err_suggests_fetch_with_cli() { [UPDATING] git repository `ssh://needs-proxy.invalid/git` warning: spurious network error[..] warning: spurious network error[..] -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `foo` as a dependency of package `foo v0.0.0 [..]` Caused by: failed to load source for dependency `foo` @@ -368,7 +368,7 @@ fn instead_of_url_printed() { .with_stderr(&format!( "\ [UPDATING] git repository `https://foo.bar/foo/bar` -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `bar` as a dependency of package `foo [..]` Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/http_registry.rs b/tests/testsuite/http_registry.rs index f5ac48f4a76..e639b7a69dd 100644 --- a/tests/testsuite/http_registry.rs +++ b/tests/testsuite/http_registry.rs @@ -68,7 +68,7 @@ fn not_on_stable() { .with_status(101) .with_stderr(&format!( "\ -error: failed to prefetch dependencies +error: failed to get `bar` as a dependency of package `foo v0.0.1 ([..])` Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/local_registry.rs b/tests/testsuite/local_registry.rs index d1de7c9e646..485ec89dcb9 100644 --- a/tests/testsuite/local_registry.rs +++ b/tests/testsuite/local_registry.rs @@ -359,7 +359,7 @@ fn invalid_dir_bad() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `bar` as a dependency of package `foo v0.0.1 [..]` Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/offline.rs b/tests/testsuite/offline.rs index afbcbfef822..a5505cff781 100644 --- a/tests/testsuite/offline.rs +++ b/tests/testsuite/offline.rs @@ -270,7 +270,7 @@ fn cargo_compile_forbird_git_httpsrepo_offline() { .build(); p.cargo("build --offline").with_status(101).with_stderr("\ -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `dep1` as a dependency of package `foo v0.5.0 [..]` Caused by: failed to load source for dependency `dep1` diff --git a/tests/testsuite/path.rs b/tests/testsuite/path.rs index e09cae89af5..d0f380156f2 100644 --- a/tests/testsuite/path.rs +++ b/tests/testsuite/path.rs @@ -511,7 +511,7 @@ fn error_message_for_missing_manifest() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `bar` as a dependency of package `foo v0.5.0 [..]` Caused by: failed to load source for dependency `bar` diff --git a/tests/testsuite/registry.rs b/tests/testsuite/registry.rs index 9643f88e0ef..acba4a2c413 100644 --- a/tests/testsuite/registry.rs +++ b/tests/testsuite/registry.rs @@ -1562,7 +1562,7 @@ fn disallow_network() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `foo` as a dependency of package `bar v0.5.0 [..]` Caused by: failed to load source for dependency `foo` diff --git a/tests/testsuite/workspaces.rs b/tests/testsuite/workspaces.rs index 9b0a5b3b20b..a20ef11ba70 100644 --- a/tests/testsuite/workspaces.rs +++ b/tests/testsuite/workspaces.rs @@ -2302,7 +2302,7 @@ fn invalid_missing() { .with_status(101) .with_stderr( "\ -[ERROR] failed to prefetch dependencies +[ERROR] failed to get `x` as a dependency of package `foo v0.1.0 [..]` Caused by: failed to load source for dependency `x`