From 66a6aa9055eff0704938f797855bb37c3d630e88 Mon Sep 17 00:00:00 2001 From: Sven van Ashbrook Date: Sat, 28 Sep 2024 20:39:41 +0000 Subject: [PATCH 1/3] Add a test to detect race condition with forked git repos When installing packages from different directories of a forked monorepo, a race condition may occur, where multiple git clients are interacting in parallel with the same git repository. Add a failing test to detect the race condition. Mark as xfail. This test succeeds due to a problem with the poetry test framework. Example pyproject.toml which triggers the problem: ```toml [tool.poetry] name = "some_other_repo" version = "0.1.0" description = "" authors = ["Jonathan Rayner "] packages = [{ include = "some_other_repo"}] [tool.poetry.dependencies] python = "^3.10 <3.13" pkg_1 = {git = "git@github.com:JonathanRayner/some_monorepo.git", subdirectory = "pkg_1" } pkg_2 = {git = "git@github.com:gustavgransbo/some_monorepo.git", subdirectory = "pkg_2", tag="v2"} [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" ``` Source: @gustavgransbo https://github.com/orgs/python-poetry/discussions/9718#discussioncomment-10785589 --- .../subdirectories/one-copy/one/__init__.py | 0 .../subdirectories/one-copy/pyproject.toml | 13 ++++++ .../subdirectories/one/one/__init__.py | 0 .../subdirectories/one/pyproject.toml | 13 ++++++ .../subdirectories/two/pyproject.toml | 13 ++++++ .../subdirectories/two/two/__init__.py | 0 tests/installation/test_executor.py | 43 +++++++++++++++++++ 7 files changed, 82 insertions(+) create mode 100644 tests/fixtures/git/github.com/forked_demo/subdirectories/one-copy/one/__init__.py create mode 100644 tests/fixtures/git/github.com/forked_demo/subdirectories/one-copy/pyproject.toml create mode 100644 tests/fixtures/git/github.com/forked_demo/subdirectories/one/one/__init__.py create mode 100644 tests/fixtures/git/github.com/forked_demo/subdirectories/one/pyproject.toml create mode 100644 tests/fixtures/git/github.com/forked_demo/subdirectories/two/pyproject.toml create mode 100644 tests/fixtures/git/github.com/forked_demo/subdirectories/two/two/__init__.py diff --git a/tests/fixtures/git/github.com/forked_demo/subdirectories/one-copy/one/__init__.py b/tests/fixtures/git/github.com/forked_demo/subdirectories/one-copy/one/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/fixtures/git/github.com/forked_demo/subdirectories/one-copy/pyproject.toml b/tests/fixtures/git/github.com/forked_demo/subdirectories/one-copy/pyproject.toml new file mode 100644 index 00000000000..1548c3a33a1 --- /dev/null +++ b/tests/fixtures/git/github.com/forked_demo/subdirectories/one-copy/pyproject.toml @@ -0,0 +1,13 @@ +[tool.poetry] +name = "one" +version = "1.0.0" +description = "Some description." +authors = [] +license = "MIT" + +[tool.poetry.dependencies] +python = "^3.7" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/tests/fixtures/git/github.com/forked_demo/subdirectories/one/one/__init__.py b/tests/fixtures/git/github.com/forked_demo/subdirectories/one/one/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/fixtures/git/github.com/forked_demo/subdirectories/one/pyproject.toml b/tests/fixtures/git/github.com/forked_demo/subdirectories/one/pyproject.toml new file mode 100644 index 00000000000..1548c3a33a1 --- /dev/null +++ b/tests/fixtures/git/github.com/forked_demo/subdirectories/one/pyproject.toml @@ -0,0 +1,13 @@ +[tool.poetry] +name = "one" +version = "1.0.0" +description = "Some description." +authors = [] +license = "MIT" + +[tool.poetry.dependencies] +python = "^3.7" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/tests/fixtures/git/github.com/forked_demo/subdirectories/two/pyproject.toml b/tests/fixtures/git/github.com/forked_demo/subdirectories/two/pyproject.toml new file mode 100644 index 00000000000..6a54d8938ff --- /dev/null +++ b/tests/fixtures/git/github.com/forked_demo/subdirectories/two/pyproject.toml @@ -0,0 +1,13 @@ +[tool.poetry] +name = "two" +version = "2.0.0" +description = "Some description." +authors = [] +license = "MIT" + +[tool.poetry.dependencies] +python = "~2.7 || ^3.4" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/tests/fixtures/git/github.com/forked_demo/subdirectories/two/two/__init__.py b/tests/fixtures/git/github.com/forked_demo/subdirectories/two/two/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/installation/test_executor.py b/tests/installation/test_executor.py index c25d05e0b67..ce0f82db284 100644 --- a/tests/installation/test_executor.py +++ b/tests/installation/test_executor.py @@ -1137,6 +1137,49 @@ def test_executor_should_install_multiple_packages_from_same_git_repository( assert archive_arg == tmp_venv.path / "src/demo/subdirectories/package_b" +@pytest.mark.xfail +def test_executor_should_install_multiple_packages_from_forked_git_repository( + mocker: MockerFixture, + tmp_venv: VirtualEnv, + pool: RepositoryPool, + config: Config, + artifact_cache: ArtifactCache, + io: BufferedIO, + wheel: Path, +) -> None: + package_a = Package( + "one", + "1.0.0", + source_type="git", + source_reference="master", + source_resolved_reference="123456", + source_url="https://github.com/demo/subdirectories.git", + source_subdirectory="one", + ) + package_b = Package( + "two", + "2.0.0", + source_type="git", + source_reference="master", + source_resolved_reference="123456", + source_url="https://github.com/forked_demo/subdirectories.git", + source_subdirectory="two", + ) + + chef = Chef(artifact_cache, tmp_venv, Factory.create_pool(config)) + chef.set_directory_wheel(wheel) + prepare_spy = mocker.spy(chef, "prepare") + + executor = Executor(tmp_venv, pool, config, io) + executor._chef = chef + executor.execute([Install(package_a), Install(package_b)]) + + # Verify that the repo for package_a is not re-used for package_b. + # both repos must be cloned serially into separate directories. + # If so, executor.prepare() will be called twice. + assert prepare_spy.call_count == 2 + + def test_executor_should_write_pep610_url_references_for_git_with_subdirectories( tmp_venv: VirtualEnv, pool: RepositoryPool, From 71b847871a3531485580c894a1d92d6ed007f0a6 Mon Sep 17 00:00:00 2001 From: Sven van Ashbrook Date: Sun, 29 Sep 2024 15:07:42 +0000 Subject: [PATCH 2/3] MockDulwichRepo: mimick git behaviour more closely When git clones a repo in a base directory, it will place the clone in a subdirectory of the base. That subdirectory is named after the repo url name, which is the "testing" right before the .git in https://github.com/test/testing.git This behaviour may introduce race conditions to Poetry. Let the mock repo mimic it, so that the (absence of) race conditions can be verified. --- tests/helpers.py | 3 ++- tests/installation/test_executor.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/helpers.py b/tests/helpers.py index 491cec88a91..d8211e7fc1a 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -113,7 +113,8 @@ def mock_clone( if not source_root: source_root = Path(Config.create().get("cache-dir")) / "src" - dest = source_root / path + assert parsed.name is not None + dest = source_root / parsed.name dest.mkdir(parents=True, exist_ok=True) copy_path(folder, dest) diff --git a/tests/installation/test_executor.py b/tests/installation/test_executor.py index ce0f82db284..1294ee1a718 100644 --- a/tests/installation/test_executor.py +++ b/tests/installation/test_executor.py @@ -1091,7 +1091,7 @@ def test_executor_should_append_subdirectory_for_git( executor.execute([Install(package)]) archive_arg = spy.call_args[0][0] - assert archive_arg == tmp_venv.path / "src/demo/subdirectories/two" + assert archive_arg == tmp_venv.path / "src/subdirectories/two" def test_executor_should_install_multiple_packages_from_same_git_repository( @@ -1131,10 +1131,10 @@ def test_executor_should_install_multiple_packages_from_same_git_repository( executor.execute([Install(package_a), Install(package_b)]) archive_arg = spy.call_args_list[0][0][0] - assert archive_arg == tmp_venv.path / "src/demo/subdirectories/package_a" + assert archive_arg == tmp_venv.path / "src/subdirectories/package_a" archive_arg = spy.call_args_list[1][0][0] - assert archive_arg == tmp_venv.path / "src/demo/subdirectories/package_b" + assert archive_arg == tmp_venv.path / "src/subdirectories/package_b" @pytest.mark.xfail From d34faea0b09950a8325f42b5434a44ef03096a44 Mon Sep 17 00:00:00 2001 From: Sven van Ashbrook Date: Sun, 29 Sep 2024 15:30:42 +0000 Subject: [PATCH 3/3] executor: fix race condition with forked git repos When installing packages from different directories of a forked monorepo, a race condition may occur, where multiple git clients are interacting in parallel with the same git repository. Fix by serializing git operations that interact with the same git repository. This makes the test succeed. Remove xfail. Links: https://github.com/python-poetry/poetry/pull/9658#discussion_r1745925411 https://github.com/orgs/python-poetry/discussions/9718#discussioncomment-10785589 --- src/poetry/installation/executor.py | 14 ++++++++++---- tests/installation/test_executor.py | 1 - 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/poetry/installation/executor.py b/src/poetry/installation/executor.py index 474850011ab..2f5f550a619 100644 --- a/src/poetry/installation/executor.py +++ b/src/poetry/installation/executor.py @@ -33,6 +33,7 @@ from poetry.utils.helpers import remove_directory from poetry.utils.isolated_build import IsolatedBuildError from poetry.utils.isolated_build import IsolatedBuildInstallError +from poetry.vcs.git import Git if TYPE_CHECKING: @@ -46,6 +47,12 @@ from poetry.utils.env import Env +def _package_get_name(package: Package) -> str | None: + if url := package.repository_url: + return Git.get_name_from_source_url(url) + return None + + class Executor: def __init__( self, @@ -167,8 +174,9 @@ def execute(self, operations: list[Operation]) -> int: if is_parallel_unsafe: serial_operations.append(operation) elif operation.package.source_type == "git": - # Git operations on the same repository should be executed serially - serial_git_operations[operation.package.source_url].append( + # Serially execute git operations that get cloned to the same directory, + # to prevent multiple parallel git operations in the same repo. + serial_git_operations[_package_get_name(operation.package)].append( operation ) else: @@ -604,8 +612,6 @@ def _prepare_archive( ) def _prepare_git_archive(self, operation: Install | Update) -> Path: - from poetry.vcs.git import Git - package = operation.package assert package.source_url is not None diff --git a/tests/installation/test_executor.py b/tests/installation/test_executor.py index 1294ee1a718..b9905d6af70 100644 --- a/tests/installation/test_executor.py +++ b/tests/installation/test_executor.py @@ -1137,7 +1137,6 @@ def test_executor_should_install_multiple_packages_from_same_git_repository( assert archive_arg == tmp_venv.path / "src/subdirectories/package_b" -@pytest.mark.xfail def test_executor_should_install_multiple_packages_from_forked_git_repository( mocker: MockerFixture, tmp_venv: VirtualEnv,