Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor to make mapping url explicit and eliminate LookupLoader #703

Merged
merged 7 commits into from
Sep 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions conda_lock/conda_lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
TimeMeta,
UpdateSpecification,
)
from conda_lock.lookup import set_lookup_location
from conda_lock.lookup import DEFAULT_MAPPING_URL
from conda_lock.models.channel import Channel
from conda_lock.models.lock_spec import LockSpecification
from conda_lock.models.pip_repository import PipRepository
Expand Down Expand Up @@ -270,6 +270,7 @@ def make_lock_files( # noqa: C901
metadata_yamls: Sequence[pathlib.Path] = (),
with_cuda: Optional[str] = None,
strip_auth: bool = False,
mapping_url: str,
) -> None:
"""
Generate a lock file from the src files provided
Expand Down Expand Up @@ -324,6 +325,7 @@ def make_lock_files( # noqa: C901
channel_overrides=channel_overrides,
platform_overrides=platform_overrides,
required_categories=required_categories if filter_categories else None,
mapping_url=mapping_url,
)

# Load existing lockfile if it exists
Expand Down Expand Up @@ -403,6 +405,7 @@ def make_lock_files( # noqa: C901
metadata_yamls=metadata_yamls,
strip_auth=strip_auth,
virtual_package_repo=virtual_package_repo,
mapping_url=mapping_url,
)

if not original_lock_content:
Expand Down Expand Up @@ -733,6 +736,7 @@ def _solve_for_arch(
virtual_package_repo: FakeRepoData,
update_spec: Optional[UpdateSpecification] = None,
strip_auth: bool = False,
mapping_url: str,
) -> List[LockedDependency]:
"""
Solve specification for a single platform
Expand All @@ -758,13 +762,14 @@ def _solve_for_arch(
update=update_spec.update,
platform=platform,
channels=channels,
mapping_url=mapping_url,
)

if requested_deps_by_name["pip"]:
if "python" not in conda_deps:
raise ValueError("Got pip specs without Python")
pip_deps = solve_pypi(
requested_deps_by_name["pip"],
pip_specs=requested_deps_by_name["pip"],
use_latest=update_spec.update,
pip_locked={
dep.name: dep for dep in update_spec.locked if dep.manager == "pip"
Expand All @@ -782,6 +787,7 @@ def _solve_for_arch(
pip_repositories=pip_repositories,
allow_pypi_requests=spec.allow_pypi_requests,
strip_auth=strip_auth,
mapping_url=mapping_url,
)
else:
pip_deps = {}
Expand Down Expand Up @@ -828,6 +834,7 @@ def create_lockfile_from_spec(
metadata_yamls: Sequence[pathlib.Path] = (),
strip_auth: bool = False,
virtual_package_repo: FakeRepoData,
mapping_url: str,
) -> Lockfile:
"""
Solve or update specification
Expand All @@ -847,6 +854,7 @@ def create_lockfile_from_spec(
virtual_package_repo=virtual_package_repo,
update_spec=update_spec,
strip_auth=strip_auth,
mapping_url=mapping_url,
)

for dep in deps:
Expand Down Expand Up @@ -1132,6 +1140,7 @@ def run_lock(
metadata_choices: AbstractSet[MetadataOption] = frozenset(),
metadata_yamls: Sequence[pathlib.Path] = (),
strip_auth: bool = False,
mapping_url: str,
) -> None:
if len(environment_files) == 0:
environment_files = handle_no_specified_source_files(lockfile_path)
Expand All @@ -1158,6 +1167,7 @@ def run_lock(
metadata_choices=metadata_choices,
metadata_yamls=metadata_yamls,
strip_auth=strip_auth,
mapping_url=mapping_url,
)


Expand Down Expand Up @@ -1365,8 +1375,11 @@ def lock(
logging.basicConfig(level=log_level)

# Set Pypi <--> Conda lookup file location
if pypi_to_conda_lookup_file:
set_lookup_location(pypi_to_conda_lookup_file)
mapping_url = (
DEFAULT_MAPPING_URL
if pypi_to_conda_lookup_file is None
else pypi_to_conda_lookup_file
)

metadata_enum_choices = set(MetadataOption(md) for md in metadata_choices)

Expand Down Expand Up @@ -1408,6 +1421,7 @@ def lock(
metadata_choices=metadata_enum_choices,
metadata_yamls=[pathlib.Path(path) for path in metadata_yamls],
strip_auth=strip_auth,
mapping_url=mapping_url,
)
if strip_auth:
with tempfile.TemporaryDirectory() as tempdir:
Expand Down
2 changes: 2 additions & 0 deletions conda_lock/conda_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def solve_conda(
update: List[str],
platform: str,
channels: List[Channel],
mapping_url: str,
) -> Dict[str, LockedDependency]:
"""
Solve (or update a previous solution of) conda specs for the given platform
Expand Down Expand Up @@ -205,6 +206,7 @@ def normalize_url(url: str) -> str:
apply_categories(
requested={k: v for k, v in specs.items() if v.manager == "conda"},
planned=planned,
mapping_url=mapping_url,
)

return planned
Expand Down
13 changes: 9 additions & 4 deletions conda_lock/lockfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,12 @@ def _truncate_main_category(


def apply_categories(
*,
requested: Dict[str, Dependency],
planned: Mapping[str, Union[List[LockedDependency], LockedDependency]],
categories: Sequence[str] = ("main", "dev"),
convert_to_pip_names: bool = False,
mapping_url: str,
) -> None:
"""map each package onto the root request the with the highest-priority category"""

Expand Down Expand Up @@ -98,14 +100,15 @@ def extract_planned_items(
return [
item
for item in planned_items
if dep_name(item.manager, item.name) not in deps
if dep_name(manager=item.manager, dep=item.name, mapping_url=mapping_url)
not in deps
]

def dep_name(manager: str, dep: str) -> str:
def dep_name(*, manager: str, dep: str, mapping_url: str) -> str:
# If we operate on lists of pip names and this is a conda dependency, we
# convert the name to a pip name.
if convert_to_pip_names and manager == "conda":
return conda_name_to_pypi_name(dep)
return conda_name_to_pypi_name(dep, mapping_url=mapping_url)
return dep

for name, request in requested.items():
Expand All @@ -123,7 +126,9 @@ def dep_name(manager: str, dep: str) -> str:

for planned_item in planned_items:
todo.extend(
dep_name(planned_item.manager, dep)
dep_name(
manager=planned_item.manager, dep=dep, mapping_url=mapping_url
)
for dep in planned_item.dependencies
# exclude virtual packages
if not (dep in deps or dep.startswith("__"))
Expand Down
162 changes: 74 additions & 88 deletions conda_lock/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import time

from functools import cached_property
from functools import lru_cache
from pathlib import Path
from typing import Dict

Expand All @@ -11,12 +11,15 @@

from filelock import FileLock, Timeout
from packaging.utils import NormalizedName, canonicalize_name
from packaging.utils import canonicalize_name as canonicalize_pypi_name
from platformdirs import user_cache_path
from typing_extensions import TypedDict


logger = logging.getLogger(__name__)

DEFAULT_MAPPING_URL = "https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/mappings/pypi/grayskull_pypi_mapping.yaml"


class MappingEntry(TypedDict):
conda_name: str
Expand All @@ -25,90 +28,74 @@ class MappingEntry(TypedDict):
pypi_name: NormalizedName


class _LookupLoader:
_mapping_url: str = "https://raw.githubusercontent.com/regro/cf-graph-countyfair/master/mappings/pypi/grayskull_pypi_mapping.yaml"

@property
def mapping_url(self) -> str:
return self._mapping_url

@mapping_url.setter
def mapping_url(self, value: str) -> None:
if self._mapping_url != value:
self._mapping_url = value
# Invalidate cache
try:
del self.pypi_lookup
except AttributeError:
pass
try:
del self.conda_lookup
except AttributeError:
pass

@cached_property
def pypi_lookup(self) -> Dict[NormalizedName, MappingEntry]:
url = self.mapping_url
if url.startswith("http://") or url.startswith("https://"):
content = cached_download_file(url)
@lru_cache(maxsize=None)
def _get_pypi_lookup(mapping_url: str) -> Dict[NormalizedName, MappingEntry]:
url = mapping_url
if url.startswith("http://") or url.startswith("https://"):
content = cached_download_file(url)
else:
if url.startswith("file://"):
path = url[len("file://") :]
else:
if url.startswith("file://"):
path = url[len("file://") :]
else:
path = url
content = Path(path).read_bytes()
logger.debug("Parsing PyPI mapping")
load_start = time.monotonic()
yaml = ruamel.yaml.YAML(typ="safe")
lookup = yaml.load(content)
load_duration = time.monotonic() - load_start
logger.debug(f"Loaded {len(lookup)} entries in {load_duration:.2f}s")
# lowercase and kebabcase the pypi names
assert lookup is not None
lookup = {canonicalize_name(k): v for k, v in lookup.items()}
for v in lookup.values():
v["pypi_name"] = canonicalize_name(v["pypi_name"])
return lookup

@cached_property
def conda_lookup(self) -> Dict[str, MappingEntry]:
return {record["conda_name"]: record for record in self.pypi_lookup.values()}


LOOKUP_OBJECT = _LookupLoader()


def get_forward_lookup() -> Dict[NormalizedName, MappingEntry]:
global LOOKUP_OBJECT
return LOOKUP_OBJECT.pypi_lookup


def get_lookup() -> Dict[str, MappingEntry]:
"""
Reverse grayskull name mapping to map conda names onto PyPI
path = url
content = Path(path).read_bytes()
logger.debug("Parsing PyPI mapping")
load_start = time.monotonic()
yaml = ruamel.yaml.YAML(typ="safe")
lookup = yaml.load(content)
load_duration = time.monotonic() - load_start
logger.debug(f"Loaded {len(lookup)} entries in {load_duration:.2f}s")
# lowercase and kebabcase the pypi names
assert lookup is not None
lookup = {canonicalize_name(k): v for k, v in lookup.items()}
for v in lookup.values():
v["pypi_name"] = canonicalize_name(v["pypi_name"])
return lookup


def pypi_name_to_conda_name(name: str, mapping_url: str) -> str:
"""Convert a PyPI package name to a conda package name.

>>> from conda_lock.lookup import DEFAULT_MAPPING_URL
>>> pypi_name_to_conda_name("build", mapping_url=DEFAULT_MAPPING_URL)
'python-build'

>>> pypi_name_to_conda_name("zpfqzvrj", mapping_url=DEFAULT_MAPPING_URL)
'zpfqzvrj'
"""
global LOOKUP_OBJECT
return LOOKUP_OBJECT.conda_lookup
cname = canonicalize_pypi_name(name)
if cname in _get_pypi_lookup(mapping_url):
lookup = _get_pypi_lookup(mapping_url)[cname]
res = lookup.get("conda_name") or lookup.get("conda_forge")
if res is not None:
return res
else:
logging.warning(
f"Could not find conda name for {cname}. Assuming identity."
)
return cname
else:
return cname


def set_lookup_location(lookup_url: str) -> None:
global LOOKUP_OBJECT
LOOKUP_OBJECT.mapping_url = lookup_url
@lru_cache(maxsize=None)
def _get_conda_lookup(mapping_url: str) -> Dict[str, MappingEntry]:
"""
Reverse grayskull name mapping to map conda names onto PyPI
"""
return {
record["conda_name"]: record
for record in _get_pypi_lookup(mapping_url).values()
}


def conda_name_to_pypi_name(name: str) -> NormalizedName:
def conda_name_to_pypi_name(name: str, mapping_url: str) -> NormalizedName:
"""return the pypi name for a conda package"""
lookup = get_lookup()
lookup = _get_conda_lookup(mapping_url=mapping_url)
cname = canonicalize_name(name)
return lookup.get(cname, {"pypi_name": cname})["pypi_name"]


def pypi_name_to_conda_name(name: str) -> str:
"""return the conda name for a pypi package"""
cname = canonicalize_name(name)
return get_forward_lookup().get(cname, {"conda_name": cname})["conda_name"]


def cached_download_file(url: str) -> bytes:
"""Download a file and cache it in the user cache directory.

Expand Down Expand Up @@ -138,26 +125,25 @@ def cached_download_file(url: str) -> bytes:
destination_etag = destination_mapping.with_suffix(".etag")
destination_lock = destination_mapping.with_suffix(".lock")

# Return the contents immediately if the file is fresh
try:
mtime = destination_mapping.stat().st_mtime
age = current_time - mtime
if age < DONT_CHECK_IF_NEWER_THAN_SECONDS:
contents = destination_mapping.read_bytes()
logger.debug(
f"Using cached mapping {destination_mapping} without "
f"checking for updates"
)
return contents
except FileNotFoundError:
pass

# Wait for any other process to finish downloading the file.
# Use the ETag to avoid downloading the file if it hasn't changed.
# Otherwise, download the file and cache the contents and ETag.
while True:
try:
with FileLock(destination_lock, timeout=5):
# Return the contents immediately if the file is fresh
try:
mtime = destination_mapping.stat().st_mtime
age = current_time - mtime
if age < DONT_CHECK_IF_NEWER_THAN_SECONDS:
contents = destination_mapping.read_bytes()
logger.debug(
f"Using cached mapping {destination_mapping} without "
f"checking for updates"
)
return contents
except FileNotFoundError:
pass
# Get the ETag from the last download, if it exists
if destination_mapping.exists() and destination_etag.exists():
logger.debug(f"Old ETag found at {destination_etag}")
Expand Down
Loading
Loading