diff --git a/news/9086.feature.rst b/news/9086.feature.rst new file mode 100644 index 00000000000..7a68189f0b2 --- /dev/null +++ b/news/9086.feature.rst @@ -0,0 +1 @@ +When a revision is specified in a Git URL, use git's partial clone feature to speed up source retrieval. diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py index 8919aa538dd..32625fdd6b6 100644 --- a/src/pip/_internal/vcs/git.py +++ b/src/pip/_internal/vcs/git.py @@ -259,12 +259,21 @@ def fetch_new(self, dest, url, rev_options): # type: (str, HiddenText, RevOptions) -> None rev_display = rev_options.to_display() logger.info('Cloning %s%s to %s', url, rev_display, display_path(dest)) - self.run_command(make_command('clone', '-q', url, dest)) + if self.get_git_version() >= (2, 17): + # Git added support for partial clone in 2.17 + # https://git-scm.com/docs/partial-clone + # Speeds up cloning by functioning without a complete copy of repository + self.run_command(make_command( + 'clone', '--filter=blob:none', '-q', url, dest, + )) + else: + self.run_command(make_command('clone', '-q', url, dest)) if rev_options.rev: # Then a specific revision was requested. rev_options = self.resolve_revision(dest, url, rev_options) branch_name = getattr(rev_options, 'branch_name', None) + logger.info('Rev options %s, branch_name %s', rev_options, branch_name) if branch_name is None: # Only do a checkout if the current commit id doesn't match # the requested revision. diff --git a/tests/functional/test_vcs_git.py b/tests/functional/test_vcs_git.py index 450ff96f4b4..c064fa99fee 100644 --- a/tests/functional/test_vcs_git.py +++ b/tests/functional/test_vcs_git.py @@ -3,6 +3,7 @@ """ import os +from unittest.mock import patch import pytest @@ -277,3 +278,97 @@ def test_resolve_commit_not_on_branch(script, tmp_path): # check we can fetch our commit rev_options = Git.make_rev_options(commit) Git().fetch_new(str(clone_path), repo_path.as_uri(), rev_options) + + +def _initialize_clonetest_server(repo_path, script): + repo_path.mkdir() + script.run("git", "init", cwd=str(repo_path)) + repo_file = repo_path / "file.txt" + repo_file.write_text(u".") + script.run("git", "add", "file.txt", cwd=str(repo_path)) + script.run("git", "commit", "-m", "initial commit", cwd=str(repo_path)) + return repo_file + + +@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old") +def test_partial_clone(script, tmp_path): + """Test partial clone w/ a git-server that supports it""" + repo_path = tmp_path / "repo" + repo_file = _initialize_clonetest_server(repo_path, script) + clone_path1 = repo_path / "clone1" + clone_path2 = repo_path / "clone2" + + commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip() + + # Enable filtering support on server + script.run("git", "config", "uploadpack.allowFilter", "true", cwd=repo_path) + script.run("git", "config", "uploadpack.allowanysha1inwant", "true", cwd=repo_path) + + # Check that we can clone at HEAD + Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options()) + # Check that we can clone to commit + Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit)) + + # Write some additional stuff to git pull + repo_file.write_text(u"..") + script.run("git", "commit", "-am", "second commit", cwd=str(repo_path)) + + # Make sure git pull works - with server supporting filtering + assert ( + "warning: filtering not recognized by server, ignoring" + not in script.run("git", "pull", cwd=clone_path1).stderr + ) + assert ( + "warning: filtering not recognized by server, ignoring" + not in script.run("git", "pull", cwd=clone_path2).stderr + ) + + +@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old") +def test_partial_clone_without_server_support(script, tmp_path): + """Test partial clone w/ a git-server that does not support it""" + repo_path = tmp_path / "repo" + repo_file = _initialize_clonetest_server(repo_path, script) + clone_path1 = repo_path / "clone1" + clone_path2 = repo_path / "clone2" + + commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip() + + # Check that we can clone at HEAD + Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options()) + # Check that we can clone to commit + Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit)) + + # Write some additional stuff to git pull + repo_file.write_text(u"..") + script.run("git", "commit", "-am", "second commit", cwd=str(repo_path)) + + # Make sure git pull works - even though server doesn't support filtering + assert ( + "warning: filtering not recognized by server, ignoring" + in script.run("git", "pull", cwd=clone_path1).stderr + ) + assert ( + "warning: filtering not recognized by server, ignoring" + in script.run("git", "pull", cwd=clone_path2).stderr + ) + + +def test_clone_without_partial_clone_support(script, tmp_path): + """Older git clients don't support partial clone. Test the fallback path""" + repo_path = tmp_path / "repo" + repo_file = _initialize_clonetest_server(repo_path, script) + clone_path = repo_path / "clone1" + + # Check that we can clone w/ old version of git w/o --filter + with patch("pip._internal.vcs.git.Git.get_git_version", return_value=(2, 16)): + Git().fetch_new(str(clone_path), repo_path.as_uri(), Git.make_rev_options()) + + repo_file.write_text(u"...") + script.run("git", "commit", "-am", "third commit", cwd=str(repo_path)) + + # Should work fine w/o attempting to use `--filter` args + assert ( + "warning: filtering not recognized by server, ignoring" + not in script.run("git", "pull", cwd=clone_path).stderr + )