From 4be3614264c28487091ed7f8cb8af30d33f41f16 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Thu, 4 Jan 2024 10:01:34 -0500 Subject: [PATCH 1/5] test(cargo-update): `--precise` not work with tag or short id This demonstrates the bug described in 13188 --- tests/testsuite/update.rs | 67 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/tests/testsuite/update.rs b/tests/testsuite/update.rs index 40bc0b476a0..0d233aa2f40 100644 --- a/tests/testsuite/update.rs +++ b/tests/testsuite/update.rs @@ -1272,3 +1272,70 @@ rustdns.workspace = true ) .run(); } + +#[cargo_test] +fn update_precise_git_revisions() { + let (git_project, git_repo) = git::new_repo("git", |p| { + p.file("Cargo.toml", &basic_lib_manifest("git")) + .file("src/lib.rs", "") + }); + let tag_name = "Nazgûl"; + git::tag(&git_repo, tag_name); + + git_project.change_file("src/lib.rs", "fn f() {}"); + git::add(&git_repo); + let head_id = git::commit(&git_repo).to_string(); + let short_id = &head_id[..8]; + let url = git_project.url(); + + let p = project() + .file( + "Cargo.toml", + &format!( + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + git = {{ git = '{url}' }} + "# + ), + ) + .file("src/lib.rs", "") + .build(); + + p.cargo("fetch") + .with_stderr(format!("[UPDATING] git repository `{url}`")) + .run(); + + assert!(p.read_lockfile().contains(&head_id)); + + p.cargo("update git --precise") + .arg(tag_name) + .with_status(101) + .with_stderr(format!( + "\ +[ERROR] Unable to update {url}#{tag_name} + +Caused by: + precise value for git is not a git revision: {tag_name} + +Caused by: +[..]" + )) + .run(); + + p.cargo("update git --precise") + .arg(short_id) + .with_status(101) + .with_stderr(format!( + "\ +[UPDATING] git repository `{url}` +[ERROR] Unable to update {url}#{short_id} + +Caused by: + object not found - no match for id ({short_id}00000000000000[..]); [..]", + )) + .run(); +} From 2734097f125f374b63002e4a76e5a7da4be182c4 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Thu, 4 Jan 2024 11:00:09 -0500 Subject: [PATCH 2/5] fix(git): represent deferred and resolved revision with an enum `cargo update --precise` might pass in any arbitrary Git reference, and `git2::Oid::from_str` would always zero-pad the given str if it is not a full SHA hex string. This introduces an enum `Revision` to represent `locked_rev` that is either deferred or resolved to an actual object ID. When `locked_rev` is a short ID or any other Git reference, Cargo first performs a Git fetch to resolve it (without --offline), and then locks it to an actual commit object ID. --- src/cargo/core/source_id.rs | 17 ++----- src/cargo/sources/git/source.rs | 81 ++++++++++++++++++++++++++------- src/cargo/sources/git/utils.rs | 16 +------ tests/testsuite/git.rs | 6 +-- tests/testsuite/update.rs | 37 ++++++++++----- 5 files changed, 99 insertions(+), 58 deletions(-) diff --git a/src/cargo/core/source_id.rs b/src/cargo/core/source_id.rs index 3b1cad94211..6e6d9d9d84b 100644 --- a/src/cargo/core/source_id.rs +++ b/src/cargo/core/source_id.rs @@ -460,21 +460,14 @@ impl SourceId { } pub fn precise_git_fragment(self) -> Option<&'static str> { - match &self.inner.precise { - Some(Precise::GitUrlFragment(s)) => Some(&s[..8]), - _ => None, - } + self.precise_full_git_fragment().map(|s| &s[..8]) } - pub fn precise_git_oid(self) -> CargoResult> { - Ok(match self.inner.precise.as_ref() { - Some(Precise::GitUrlFragment(s)) => { - Some(git2::Oid::from_str(s).with_context(|| { - format!("precise value for git is not a git revision: {}", s) - })?) - } + pub fn precise_full_git_fragment(self) -> Option<&'static str> { + match &self.inner.precise { + Some(Precise::GitUrlFragment(s)) => Some(&s), _ => None, - }) + } } /// Creates a new `SourceId` from this source with the given `precise`. diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 7d303f0c25f..3be8fc4567f 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -71,8 +71,9 @@ pub struct GitSource<'cfg> { /// The Git reference from the manifest file. manifest_reference: GitReference, /// The revision which a git source is locked to. - /// This is expected to be set after the Git repository is fetched. - locked_rev: Option, + /// + /// Expected to always be [`Revision::Locked`] after the Git repository is fetched. + locked_rev: Revision, /// The unique identifier of this source. source_id: SourceId, /// The underlying path source to discover packages inside the Git repository. @@ -103,7 +104,11 @@ impl<'cfg> GitSource<'cfg> { let remote = GitRemote::new(source_id.url()); let manifest_reference = source_id.git_reference().unwrap().clone(); - let locked_rev = source_id.precise_git_oid()?; + let locked_rev = source_id + .precise_full_git_fragment() + .map(|s| Revision::new(s.into())) + .unwrap_or_else(|| source_id.git_reference().unwrap().clone().into()); + let ident = ident_shallow( &source_id, config @@ -155,6 +160,49 @@ impl<'cfg> GitSource<'cfg> { } } +/// Indicates a [Git revision] that might be locked or deferred to be resolved. +/// +/// [Git revision]: https://git-scm.com/docs/revisions +#[derive(Clone, Debug)] +enum Revision { + /// A [Git reference] that would trigger extra fetches when being resolved. + /// + /// [Git reference]: https://git-scm.com/book/en/v2/Git-Internals-Git-References + Deferred(GitReference), + /// A locked revision of the actual Git commit object ID. + Locked(git2::Oid), +} + +impl Revision { + fn new(rev: &str) -> Revision { + let oid = git2::Oid::from_str(rev).ok(); + match oid { + // Git object ID is supposed to be a hex string of 20 (SHA1) or 32 (SHA256) bytes. + // Its length must be double to the underlying bytes (40 or 64), + // otherwise libgit2 would happily zero-pad the returned oid. + // See rust-lang/cargo#13188 + Some(oid) if oid.as_bytes().len() * 2 == rev.len() => Revision::Locked(oid), + _ => Revision::Deferred(GitReference::Rev(rev.to_string())), + } + } +} + +impl From for Revision { + fn from(value: GitReference) -> Self { + Revision::Deferred(value) + } +} + +impl From for GitReference { + fn from(value: Revision) -> Self { + match value { + Revision::Deferred(git_ref) => git_ref, + Revision::Locked(oid) => GitReference::Rev(oid.to_string()), + } + } +} + + /// Create an identifier from a URL, /// essentially turning `proto://host/path/repo` into `repo-`. fn ident(id: &SourceId) -> String { @@ -252,16 +300,17 @@ impl<'cfg> Source for GitSource<'cfg> { let db_path = db_path.into_path_unlocked(); let db = self.remote.db_at(&db_path).ok(); - let (db, actual_rev) = match (self.locked_rev, db) { + + let (db, actual_rev) = match (&self.locked_rev, db) { // If we have a locked revision, and we have a preexisting database // which has that revision, then no update needs to happen. - (Some(rev), Some(db)) if db.contains(rev) => (db, rev), + (Revision::Locked(oid), Some(db)) if db.contains(*oid) => (db, *oid), // If we're in offline mode, we're not locked, and we have a // database, then try to resolve our reference with the preexisting // repository. - (None, Some(db)) if self.config.offline() => { - let rev = db.resolve(&self.manifest_reference).with_context(|| { + (Revision::Deferred(git_ref), Some(db)) if self.config.offline() => { + let rev = db.resolve(&git_ref).with_context(|| { "failed to lookup reference in preexisting repository, and \ can't check for updates in offline mode (--offline)" })?; @@ -279,6 +328,7 @@ impl<'cfg> Source for GitSource<'cfg> { self.remote.url() ); } + if !self.quiet { self.config.shell().status( "Updating", @@ -288,13 +338,9 @@ impl<'cfg> Source for GitSource<'cfg> { trace!("updating git source `{:?}`", self.remote); - self.remote.checkout( - &db_path, - db, - &self.manifest_reference, - locked_rev, - self.config, - )? + let locked_rev = locked_rev.clone().into(); + self.remote + .checkout(&db_path, db, &locked_rev, self.config)? } }; @@ -321,7 +367,7 @@ impl<'cfg> Source for GitSource<'cfg> { self.path_source = Some(path_source); self.short_id = Some(short_id.as_str().into()); - self.locked_rev = Some(actual_rev); + self.locked_rev = Revision::Locked(actual_rev); self.path_source.as_mut().unwrap().update()?; // Hopefully this shouldn't incur too much of a performance hit since @@ -350,7 +396,10 @@ impl<'cfg> Source for GitSource<'cfg> { } fn fingerprint(&self, _pkg: &Package) -> CargoResult { - Ok(self.locked_rev.as_ref().unwrap().to_string()) + match &self.locked_rev { + Revision::Locked(oid) => Ok(oid.to_string()), + _ => unreachable!("locked_rev must be resolved when computing fingerprint"), + } } fn describe(&self) -> String { diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index 7ce43d9bd1f..20ef6ddedda 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -95,8 +95,6 @@ impl GitRemote { /// This ensures that it gets the up-to-date commit when a named reference /// is given (tag, branch, refs/*). Thus, network connection is involved. /// - /// When `locked_rev` is provided, it takes precedence over `reference`. - /// /// If we have a previous instance of [`GitDatabase`] then fetch into that /// if we can. If that can successfully load our revision then we've /// populated the database with the latest version of `reference`, so @@ -106,11 +104,8 @@ impl GitRemote { into: &Path, db: Option, reference: &GitReference, - locked_rev: Option, cargo_config: &Config, ) -> CargoResult<(GitDatabase, git2::Oid)> { - let locked_ref = locked_rev.map(|oid| GitReference::Rev(oid.to_string())); - let reference = locked_ref.as_ref().unwrap_or(reference); if let Some(mut db) = db { fetch( &mut db.repo, @@ -121,11 +116,7 @@ impl GitRemote { ) .with_context(|| format!("failed to fetch into: {}", into.display()))?; - let resolved_commit_hash = match locked_rev { - Some(rev) => db.contains(rev).then_some(rev), - None => resolve_ref(reference, &db.repo).ok(), - }; - if let Some(rev) = resolved_commit_hash { + if let Some(rev) = resolve_ref(reference, &db.repo).ok() { return Ok((db, rev)); } } @@ -146,10 +137,7 @@ impl GitRemote { RemoteKind::GitDependency, ) .with_context(|| format!("failed to clone into: {}", into.display()))?; - let rev = match locked_rev { - Some(rev) => rev, - None => resolve_ref(reference, &repo)?, - }; + let rev = resolve_ref(reference, &repo)?; Ok(( GitDatabase { diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index d9289acc6ba..7ad04e6805e 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -757,13 +757,11 @@ fn update_with_shared_deps() { .with_status(101) .with_stderr( "\ +[UPDATING] git repository [..] [ERROR] Unable to update [..] Caused by: - precise value for git is not a git revision: 0.1.2 - -Caused by: - unable to parse OID - contains invalid characters; class=Invalid (3) + revspec '0.1.2' not found; class=Reference (4); code=NotFound (-3) ", ) .run(); diff --git a/tests/testsuite/update.rs b/tests/testsuite/update.rs index 0d233aa2f40..7ef9ca97edc 100644 --- a/tests/testsuite/update.rs +++ b/tests/testsuite/update.rs @@ -1281,6 +1281,7 @@ fn update_precise_git_revisions() { }); let tag_name = "Nazgûl"; git::tag(&git_repo, tag_name); + let tag_commit_id = git_repo.head().unwrap().target().unwrap().to_string(); git_project.change_file("src/lib.rs", "fn f() {}"); git::add(&git_repo); @@ -1313,29 +1314,41 @@ fn update_precise_git_revisions() { p.cargo("update git --precise") .arg(tag_name) - .with_status(101) .with_stderr(format!( "\ -[ERROR] Unable to update {url}#{tag_name} - -Caused by: - precise value for git is not a git revision: {tag_name} - -Caused by: -[..]" +[UPDATING] git repository `{url}` +[UPDATING] git v0.5.0 ([..]) -> #{}", + &tag_commit_id[..8], )) .run(); + assert!(p.read_lockfile().contains(&tag_commit_id)); + assert!(!p.read_lockfile().contains(&head_id)); + p.cargo("update git --precise") .arg(short_id) - .with_status(101) .with_stderr(format!( "\ [UPDATING] git repository `{url}` -[ERROR] Unable to update {url}#{short_id} +[UPDATING] git v0.5.0 ([..]) -> #{short_id}", + )) + .run(); -Caused by: - object not found - no match for id ({short_id}00000000000000[..]); [..]", + assert!(p.read_lockfile().contains(&head_id)); + assert!(!p.read_lockfile().contains(&tag_commit_id)); + + // updating back to tag still requires a git fetch, + // as the ref may change over time. + p.cargo("update git --precise") + .arg(tag_name) + .with_stderr(format!( + "\ +[UPDATING] git repository `{url}` +[UPDATING] git v0.5.0 ([..]) -> #{}", + &tag_commit_id[..8], )) .run(); + + assert!(p.read_lockfile().contains(&tag_commit_id)); + assert!(!p.read_lockfile().contains(&head_id)); } From ce551d73b86759c924376c7ea15cf5d17f58e833 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Thu, 4 Jan 2024 16:00:06 -0500 Subject: [PATCH 3/5] refactor(git): remove `manifest_reference` from GitSource --- src/cargo/sources/git/source.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 3be8fc4567f..8cfd520cddd 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -68,8 +68,6 @@ use url::Url; pub struct GitSource<'cfg> { /// The git remote which we're going to fetch from. remote: GitRemote, - /// The Git reference from the manifest file. - manifest_reference: GitReference, /// The revision which a git source is locked to. /// /// Expected to always be [`Revision::Locked`] after the Git repository is fetched. @@ -103,7 +101,7 @@ impl<'cfg> GitSource<'cfg> { assert!(source_id.is_git(), "id is not git, id={}", source_id); let remote = GitRemote::new(source_id.url()); - let manifest_reference = source_id.git_reference().unwrap().clone(); + // Fallback to git ref from mainfest if there is no locked revision. let locked_rev = source_id .precise_full_git_fragment() .map(|s| Revision::new(s.into())) @@ -119,7 +117,6 @@ impl<'cfg> GitSource<'cfg> { let source = GitSource { remote, - manifest_reference, locked_rev, source_id, path_source: None, @@ -239,9 +236,12 @@ impl<'cfg> Debug for GitSource<'cfg> { // TODO(-Znext-lockfile-bump): set it to true when stabilizing // lockfile v4, because we want Source ID serialization to be // consistent with lockfile. - match self.manifest_reference.pretty_ref(false) { - Some(s) => write!(f, " ({})", s), - None => Ok(()), + match &self.locked_rev { + Revision::Deferred(git_ref) => match git_ref.pretty_ref(false) { + Some(s) => write!(f, " ({})", s), + None => Ok(()), + }, + Revision::Locked(oid) => write!(f, " ({oid})"), } } } From 093f7c837026a9f75cced4b637cfafef51c92a2a Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Thu, 4 Jan 2024 16:29:56 -0500 Subject: [PATCH 4/5] refactor: rename precise_full_git_fragment to precise_git_fragment --- src/cargo/core/source_id.rs | 4 ---- src/cargo/ops/cargo_generate_lockfile.rs | 2 +- src/cargo/sources/git/source.rs | 3 +-- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/cargo/core/source_id.rs b/src/cargo/core/source_id.rs index 6e6d9d9d84b..1c4982ceb9c 100644 --- a/src/cargo/core/source_id.rs +++ b/src/cargo/core/source_id.rs @@ -460,10 +460,6 @@ impl SourceId { } pub fn precise_git_fragment(self) -> Option<&'static str> { - self.precise_full_git_fragment().map(|s| &s[..8]) - } - - pub fn precise_full_git_fragment(self) -> Option<&'static str> { match &self.inner.precise { Some(Precise::GitUrlFragment(s)) => Some(&s), _ => None, diff --git a/src/cargo/ops/cargo_generate_lockfile.rs b/src/cargo/ops/cargo_generate_lockfile.rs index 1bba64925ec..e638c69390f 100644 --- a/src/cargo/ops/cargo_generate_lockfile.rs +++ b/src/cargo/ops/cargo_generate_lockfile.rs @@ -167,7 +167,7 @@ pub fn update_lockfile(ws: &Workspace<'_>, opts: &UpdateOptions<'_>) -> CargoRes format!( "{} -> #{}", removed[0], - &added[0].source_id().precise_git_fragment().unwrap() + &added[0].source_id().precise_git_fragment().unwrap()[..8], ) } else { format!("{} -> v{}", removed[0], added[0].version()) diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 8cfd520cddd..c8f14737c0a 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -103,7 +103,7 @@ impl<'cfg> GitSource<'cfg> { let remote = GitRemote::new(source_id.url()); // Fallback to git ref from mainfest if there is no locked revision. let locked_rev = source_id - .precise_full_git_fragment() + .precise_git_fragment() .map(|s| Revision::new(s.into())) .unwrap_or_else(|| source_id.git_reference().unwrap().clone().into()); @@ -199,7 +199,6 @@ impl From for GitReference { } } - /// Create an identifier from a URL, /// essentially turning `proto://host/path/repo` into `repo-`. fn ident(id: &SourceId) -> String { From e6f2dfd4524ae50eaf6c243077aae3b72a545c1e Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Thu, 4 Jan 2024 17:43:19 -0500 Subject: [PATCH 5/5] test(cargo-update): verify a oid-like tag triggers a re-fetch --- tests/testsuite/update.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/testsuite/update.rs b/tests/testsuite/update.rs index 7ef9ca97edc..93299e910dd 100644 --- a/tests/testsuite/update.rs +++ b/tests/testsuite/update.rs @@ -1351,4 +1351,22 @@ fn update_precise_git_revisions() { assert!(p.read_lockfile().contains(&tag_commit_id)); assert!(!p.read_lockfile().contains(&head_id)); + + // Now make a tag looks like an oid. + // It requires a git fetch, as the oid cannot be found in preexisting git db. + let arbitrary_tag: String = std::iter::repeat('a').take(head_id.len()).collect(); + git::tag(&git_repo, &arbitrary_tag); + + p.cargo("update git --precise") + .arg(&arbitrary_tag) + .with_stderr(format!( + "\ +[UPDATING] git repository `{url}` +[UPDATING] git v0.5.0 ([..]) -> #{}", + &head_id[..8], + )) + .run(); + + assert!(p.read_lockfile().contains(&head_id)); + assert!(!p.read_lockfile().contains(&tag_commit_id)); }