From 23ee20469afd1f003bd7afa313494f426b78a665 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 6 May 2019 11:35:17 -0700 Subject: [PATCH 1/2] Fix skipping over invalid registry packages This accidentally regressed in the previous caching PR for Cargo. Invalid lines of JSON in the registry are intended to be skipped over, but when skipping we forgot to update some indices which meant that all future versions would fail to parse as well! --- src/cargo/sources/registry/index.rs | 2 +- tests/testsuite/registry.rs | 28 ++++++++++++++++++++++++++++ tests/testsuite/support/registry.rs | 18 +++++++++++++++--- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index f30ac76ea2a..3fc7e4ddfb5 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -520,6 +520,7 @@ impl Summaries { // interpretation of each line here and older cargo will simply // ignore the new lines. let line = &contents[start..end]; + start = end + 1; let summary = match IndexSummary::parse(line, source_id) { Ok(summary) => summary, Err(e) => { @@ -530,7 +531,6 @@ impl Summaries { let version = summary.summary.package_id().version().clone(); cache.versions.push((version.clone(), line)); ret.versions.insert(version, summary.into()); - start = end + 1; } if let Some(index_version) = index_version { cache_bytes = Some(cache.serialize(index_version)); diff --git a/tests/testsuite/registry.rs b/tests/testsuite/registry.rs index c99f081092d..3413dc92304 100644 --- a/tests/testsuite/registry.rs +++ b/tests/testsuite/registry.rs @@ -1953,3 +1953,31 @@ fn rename_deps_and_features() { p.cargo("build --features bar/foo01").run(); p.cargo("build --features bar/another").run(); } + +#[test] +fn ignore_invalid_json_lines() { + Package::new("foo", "0.1.0").publish(); + Package::new("foo", "0.1.1") + .invalid_json(true) + .publish(); + Package::new("foo", "0.2.0").publish(); + + let p = project() + .file( + "Cargo.toml", + r#" + [project] + name = "a" + version = "0.5.0" + authors = [] + + [dependencies] + foo = '0.1.0' + foo02 = { version = '0.2.0', package = 'foo' } + "#, + ) + .file("src/lib.rs", "") + .build(); + + p.cargo("build").run(); +} diff --git a/tests/testsuite/support/registry.rs b/tests/testsuite/support/registry.rs index db796301e81..023ece91320 100644 --- a/tests/testsuite/support/registry.rs +++ b/tests/testsuite/support/registry.rs @@ -7,8 +7,6 @@ use cargo::sources::CRATES_IO_INDEX; use cargo::util::Sha256; use flate2::write::GzEncoder; use flate2::Compression; -use git2; -use hex; use tar::{Builder, Header}; use url::Url; @@ -137,6 +135,7 @@ pub struct Package { features: HashMap>, local: bool, alternative: bool, + invalid_json: bool, } #[derive(Clone)] @@ -232,6 +231,7 @@ impl Package { features: HashMap::new(), local: false, alternative: false, + invalid_json: false, } } @@ -342,6 +342,13 @@ impl Package { self } + /// Causes the JSON line emitted in the index to be invalid, presumably + /// causing Cargo to skip over this version. + pub fn invalid_json(&mut self, invalid: bool) -> &mut Package { + self.invalid_json = invalid; + self + } + /// Creates the package and place it in the registry. /// /// This does not actually use Cargo's publishing system, but instead @@ -384,8 +391,13 @@ impl Package { t!(t!(File::open(&self.archive_dst())).read_to_end(&mut c)); cksum(&c) }; + let name = if self.invalid_json { + serde_json::json!(1) + } else { + serde_json::json!(self.name) + }; let line = serde_json::json!({ - "name": self.name, + "name": name, "vers": self.vers, "deps": deps, "cksum": cksum, From 71c01fe9891a341cae76283d9a71b866bf3241be Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 6 May 2019 11:41:18 -0700 Subject: [PATCH 2/2] Make usage of `memchr` more robust Use an iterator adaptor which is tasked with keeping track of indices for us so we don't have to indice juggle elsewhere. --- src/cargo/sources/registry/index.rs | 46 +++++++++++++++++++---------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index 3fc7e4ddfb5..bcd7e5af05c 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -490,7 +490,7 @@ impl Summaries { if cfg!(debug_assertions) { cache_contents = Some(s.raw_data); } else { - return Ok(Some(s)) + return Ok(Some(s)); } } Err(e) => { @@ -512,15 +512,12 @@ impl Summaries { ret.raw_data = contents.to_vec(); let mut cache = SummariesCache::default(); hit_closure = true; - let mut start = 0; - for end in memchr::Memchr::new(b'\n', contents) { + for line in split(contents, b'\n') { // Attempt forwards-compatibility on the index by ignoring // everything that we ourselves don't understand, that should // allow future cargo implementations to break the // interpretation of each line here and older cargo will simply // ignore the new lines. - let line = &contents[start..end]; - start = end + 1; let summary = match IndexSummary::parse(line, source_id) { Ok(summary) => summary, Err(e) => { @@ -635,10 +632,8 @@ impl<'a> SummariesCache<'a> { if *first_byte != CURRENT_CACHE_VERSION { failure::bail!("looks like a different Cargo's cache, bailing out"); } - let mut iter = memchr::Memchr::new(0, rest); - let mut start = 0; - if let Some(end) = iter.next() { - let update = &rest[start..end]; + let mut iter = split(rest, 0); + if let Some(update) = iter.next() { if update != last_index_update.as_bytes() { failure::bail!( "cache out of date: current index ({}) != cache ({})", @@ -646,19 +641,15 @@ impl<'a> SummariesCache<'a> { str::from_utf8(update)?, ) } - start = end + 1; } else { failure::bail!("malformed file"); } let mut ret = SummariesCache::default(); - while let Some(version_end) = iter.next() { - let version = &rest[start..version_end]; + while let Some(version) = iter.next() { let version = str::from_utf8(version)?; let version = Version::parse(version)?; - let summary_end = iter.next().unwrap(); - let summary = &rest[version_end + 1..summary_end]; + let summary = iter.next().unwrap(); ret.versions.push((version, summary)); - start = summary_end + 1; } Ok(ret) } @@ -740,3 +731,28 @@ impl IndexSummary { }) } } + +fn split<'a>(haystack: &'a [u8], needle: u8) -> impl Iterator + 'a { + struct Split<'a> { + haystack: &'a [u8], + needle: u8, + } + + impl<'a> Iterator for Split<'a> { + type Item = &'a [u8]; + + fn next(&mut self) -> Option<&'a [u8]> { + if self.haystack.is_empty() { + return None; + } + let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) { + Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]), + None => (self.haystack, &[][..]), + }; + self.haystack = remaining; + Some(ret) + } + } + + Split { haystack, needle } +}