Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache is_satisfied_by #12453

Merged
merged 5 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/12453.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve performance of resolution of large dependency trees, with more caching.
12 changes: 6 additions & 6 deletions src/pip/_internal/resolution/resolvelib/candidates.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,13 +352,13 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.dist!r})"

def __hash__(self) -> int:
return hash((self.__class__, self.name, self.version))
def __eq__(self, other: object) -> bool:
if not isinstance(other, AlreadyInstalledCandidate):
return NotImplemented
return self.name == other.name and self.version == other.version

def __eq__(self, other: Any) -> bool:
if isinstance(other, self.__class__):
return self.name == other.name and self.version == other.version
return False
def __hash__(self) -> int:
return hash((self.name, self.version))

@property
def project_name(self) -> NormalizedName:
Expand Down
4 changes: 3 additions & 1 deletion src/pip/_internal/resolution/resolvelib/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
from typing import (
TYPE_CHECKING,
Callable,
Dict,
FrozenSet,
Iterable,
Expand Down Expand Up @@ -391,6 +392,7 @@ def find_candidates(
incompatibilities: Mapping[str, Iterator[Candidate]],
constraint: Constraint,
prefers_installed: bool,
is_satisfied_by: Callable[[Requirement, Candidate], bool],
) -> Iterable[Candidate]:
# Collect basic lookup information from the requirements.
explicit_candidates: Set[Candidate] = set()
Expand Down Expand Up @@ -456,7 +458,7 @@ def find_candidates(
for c in explicit_candidates
if id(c) not in incompat_ids
and constraint.is_satisfied_by(c)
and all(req.is_satisfied_by(c) for req in requirements[identifier])
and all(is_satisfied_by(req, c) for req in requirements[identifier])
)

def _make_requirements_from_install_req(
Expand Down
3 changes: 3 additions & 0 deletions src/pip/_internal/resolution/resolvelib/provider.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import collections
import math
from functools import lru_cache
from typing import (
TYPE_CHECKING,
Dict,
Expand Down Expand Up @@ -234,8 +235,10 @@ def _eligible_for_upgrade(identifier: str) -> bool:
constraint=constraint,
prefers_installed=(not _eligible_for_upgrade(identifier)),
incompatibilities=incompatibilities,
is_satisfied_by=self.is_satisfied_by,
)

@lru_cache(maxsize=None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to check how much this increases memory consumption for a worst case scenario, i.e. when ResolutionTooDeep is reached.

This set of requirements can reach that sphinx sphinx-rtd-theme sphinx-toolbox myst-parser sphinxcontrib-bibtex nbsphinx. I can test this in the new year if you'd like.

Copy link
Member

@notatallshaw notatallshaw Jan 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I experimented with a few different requirements dry install, looking at apache-airflow[all] I saw an increase of peak memory usage from 298 MBs to 306 MBs, I also saw the time to completion drop from ~2m 41s to ~2m 12s.

Personally this seems significantly worth the memory/time trade off to me (and looking at the memray flamegraph of these installs there are potentially big areas for improvement in other parts of Pip's codebase to use less memory)

def is_satisfied_by(self, requirement: Requirement, candidate: Candidate) -> bool:
return requirement.is_satisfied_by(candidate)

Expand Down
46 changes: 46 additions & 0 deletions src/pip/_internal/resolution/resolvelib/requirements.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any

from pip._vendor.packaging.specifiers import SpecifierSet
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name

Expand All @@ -17,6 +19,14 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.candidate!r})"

def __hash__(self) -> int:
return hash(self.candidate)

def __eq__(self, other: Any) -> bool:
if not isinstance(other, ExplicitRequirement):
return False
return self.candidate == other.candidate

@property
def project_name(self) -> NormalizedName:
# No need to canonicalize - the candidate did this
Expand Down Expand Up @@ -49,6 +59,14 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return f"{self.__class__.__name__}({str(self._ireq.req)!r})"

def __eq__(self, other: object) -> bool:
if not isinstance(other, SpecifierRequirement):
return NotImplemented
return str(self._ireq) == str(other._ireq)

def __hash__(self) -> int:
return hash(str(self._ireq))

@property
def project_name(self) -> NormalizedName:
assert self._ireq.req, "Specifier-backed ireq is always PEP 508"
Expand Down Expand Up @@ -98,12 +116,21 @@ def __init__(self, ireq: InstallRequirement) -> None:
self._ireq = install_req_drop_extras(ireq)
self._extras = frozenset(canonicalize_name(e) for e in self._ireq.extras)

def __eq__(self, other: object) -> bool:
if not isinstance(other, SpecifierWithoutExtrasRequirement):
return NotImplemented
return str(self._ireq) == str(other._ireq)

def __hash__(self) -> int:
return hash(str(self._ireq))


class RequiresPythonRequirement(Requirement):
"""A requirement representing Requires-Python metadata."""

def __init__(self, specifier: SpecifierSet, match: Candidate) -> None:
self.specifier = specifier
self._specifier_string = str(specifier) # for faster __eq__
self._candidate = match

def __str__(self) -> str:
Expand All @@ -112,6 +139,17 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return f"{self.__class__.__name__}({str(self.specifier)!r})"

def __hash__(self) -> int:
return hash((self._specifier_string, self._candidate))

def __eq__(self, other: Any) -> bool:
if not isinstance(other, RequiresPythonRequirement):
return False
return (
self._specifier_string == other._specifier_string
and self._candidate == other._candidate
)

@property
def project_name(self) -> NormalizedName:
return self._candidate.project_name
Expand Down Expand Up @@ -148,6 +186,14 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return f"{self.__class__.__name__}({str(self._name)!r})"

def __eq__(self, other: object) -> bool:
if not isinstance(other, UnsatisfiableRequirement):
return NotImplemented
return self._name == other._name

def __hash__(self) -> int:
return hash(self._name)

@property
def project_name(self) -> NormalizedName:
return self._name
Expand Down
10 changes: 10 additions & 0 deletions tests/unit/resolution_resolvelib/test_requirement.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@
# Editables


def _is_satisfied_by(requirement: Requirement, candidate: Candidate) -> bool:
"""A helper function to check if a requirement is satisfied by a candidate.

Used for mocking PipProvider.is_satified_by.
"""
return requirement.is_satisfied_by(candidate)


@pytest.fixture
def test_cases(data: TestData) -> Iterator[List[Tuple[str, str, int]]]:
def _data_file(name: str) -> Path:
Expand Down Expand Up @@ -80,6 +88,7 @@ def test_new_resolver_correct_number_of_matches(
{},
Constraint.empty(),
prefers_installed=False,
is_satisfied_by=_is_satisfied_by,
)
assert sum(1 for _ in matches) == match_count

Expand All @@ -98,6 +107,7 @@ def test_new_resolver_candidates_match_requirement(
{},
Constraint.empty(),
prefers_installed=False,
is_satisfied_by=_is_satisfied_by,
)
for c in candidates:
assert isinstance(c, Candidate)
Expand Down
Loading