From 0c51d379e284de93773843e84984557c39d3008d Mon Sep 17 00:00:00 2001
From: Ben Mares <services-git-throwaway1@tensorial.com>
Date: Tue, 10 Sep 2024 16:19:13 +0200
Subject: [PATCH 1/3] Parse pypi mapping with ruamel.yaml instead of PyYAML

For me this reduces loading time from 5.44s to 1.63s.
---
 conda_lock/lookup.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/conda_lock/lookup.py b/conda_lock/lookup.py
index 60fa33d7..6908c3e2 100644
--- a/conda_lock/lookup.py
+++ b/conda_lock/lookup.py
@@ -1,14 +1,20 @@
+import logging
+import time
+
 from functools import cached_property
 from pathlib import Path
 from typing import Dict
 
 import requests
-import yaml
+import ruamel.yaml
 
 from packaging.utils import NormalizedName, canonicalize_name
 from typing_extensions import TypedDict
 
 
+logger = logging.getLogger(__name__)
+
+
 class MappingEntry(TypedDict):
     conda_name: str
     # legacy field, generally not used by anything anymore
@@ -50,7 +56,12 @@ def pypi_lookup(self) -> Dict[NormalizedName, MappingEntry]:
             else:
                 path = url
             content = Path(path).read_bytes()
-        lookup = yaml.safe_load(content)
+        logger.debug("Parsing PyPI mapping")
+        load_start = time.monotonic()
+        yaml = ruamel.yaml.YAML(typ="safe")
+        lookup = yaml.load(content)
+        load_duration = time.monotonic() - load_start
+        logger.debug(f"Loaded {len(lookup)} entries in {load_duration:.2f}s")
         # lowercase and kebabcase the pypi names
         assert lookup is not None
         lookup = {canonicalize_name(k): v for k, v in lookup.items()}

From 800f6ff315757fe7a2d1069721bd63e999330d59 Mon Sep 17 00:00:00 2001
From: Ben Mares <services-git-throwaway1@tensorial.com>
Date: Tue, 10 Sep 2024 16:35:44 +0200
Subject: [PATCH 2/3] Cache the PyPI mapping on disk to avoid redundant
 downloads

Not really necessary since the slowdown is due to parsing the YAML,
but I wrote it anyways, so let's include it.
---
 conda_lock/lookup.py | 94 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 3 deletions(-)

diff --git a/conda_lock/lookup.py b/conda_lock/lookup.py
index 6908c3e2..b7be0626 100644
--- a/conda_lock/lookup.py
+++ b/conda_lock/lookup.py
@@ -1,3 +1,4 @@
+import hashlib
 import logging
 import time
 
@@ -8,7 +9,9 @@
 import requests
 import ruamel.yaml
 
+from filelock import FileLock, Timeout
 from packaging.utils import NormalizedName, canonicalize_name
+from platformdirs import user_cache_path
 from typing_extensions import TypedDict
 
 
@@ -47,9 +50,7 @@ def mapping_url(self, value: str) -> None:
     def pypi_lookup(self) -> Dict[NormalizedName, MappingEntry]:
         url = self.mapping_url
         if url.startswith("http://") or url.startswith("https://"):
-            res = requests.get(self._mapping_url)
-            res.raise_for_status()
-            content = res.content
+            content = cached_download_file(url)
         else:
             if url.startswith("file://"):
                 path = url[len("file://") :]
@@ -106,3 +107,90 @@ def pypi_name_to_conda_name(name: str) -> str:
     """return the conda name for a pypi package"""
     cname = canonicalize_name(name)
     return get_forward_lookup().get(cname, {"conda_name": cname})["conda_name"]
+
+
+def cached_download_file(url: str) -> bytes:
+    """Download a file and cache it in the user cache directory.
+
+    If the file is already cached, return the cached contents.
+    If the file is not cached, download it and cache the contents
+    and the ETag.
+
+    Protect against multiple processes downloading the same file.
+    """
+    CLEAR_CACHE_AFTER_SECONDS = 60 * 60 * 24 * 2  # 2 days
+    DONT_CHECK_IF_NEWER_THAN_SECONDS = 60 * 5  # 5 minutes
+    current_time = time.time()
+    cache = user_cache_path("conda-lock", appauthor=False)
+    cache.mkdir(parents=True, exist_ok=True)
+
+    # clear out old cache files
+    for file in cache.iterdir():
+        if file.name.startswith("pypi-mapping-"):
+            mtime = file.stat().st_mtime
+            age = current_time - mtime
+            if age < 0 or age > CLEAR_CACHE_AFTER_SECONDS:
+                logger.debug("Removing old cache file %s", file)
+                file.unlink()
+
+    url_hash = hashlib.sha256(url.encode()).hexdigest()[:4]
+    destination_mapping = cache / f"pypi-mapping-{url_hash}.yaml"
+    destination_etag = destination_mapping.with_suffix(".etag")
+    destination_lock = destination_mapping.with_suffix(".lock")
+
+    # Return the contents immediately if the file is fresh
+    try:
+        mtime = destination_mapping.stat().st_mtime
+        age = current_time - mtime
+        if age < DONT_CHECK_IF_NEWER_THAN_SECONDS:
+            contents = destination_mapping.read_bytes()
+            logger.debug(
+                f"Using cached mapping {destination_mapping} without "
+                f"checking for updates"
+            )
+            return contents
+    except FileNotFoundError:
+        pass
+
+    # Wait for any other process to finish downloading the file.
+    # Use the ETag to avoid downloading the file if it hasn't changed.
+    # Otherwise, download the file and cache the contents and ETag.
+    while True:
+        try:
+            with FileLock(destination_lock, timeout=5):
+                # Get the ETag from the last download, if it exists
+                if destination_mapping.exists() and destination_etag.exists():
+                    logger.debug(f"Old ETag found at {destination_etag}")
+                    try:
+                        old_etag = destination_etag.read_text().strip()
+                        headers = {"If-None-Match": old_etag}
+                    except FileNotFoundError:
+                        logger.warning("Failed to read ETag")
+                        headers = {}
+                else:
+                    headers = {}
+                # Download the file and cache the result.
+                logger.debug(f"Requesting {url}")
+                res = requests.get(url, headers=headers)
+                if res.status_code == 304:
+                    logger.debug(
+                        f"{url} has not changed since last download, "
+                        f"using {destination_mapping}"
+                    )
+                else:
+                    res.raise_for_status()
+                    time.sleep(10)
+                    destination_mapping.write_bytes(res.content)
+                    if "ETag" in res.headers:
+                        destination_etag.write_text(res.headers["ETag"])
+                    else:
+                        logger.warning("No ETag in response headers")
+                logger.debug(f"Downloaded {url} to {destination_mapping}")
+                return destination_mapping.read_bytes()
+
+        except Timeout:
+            logger.warning(
+                f"Failed to acquire lock on {destination_lock}, it is likely "
+                f"being downloaded by another process. Retrying...",
+                destination_lock,
+            )

From 016af6c89cdbc7beb601cea3c9a5a3639ca9e0fe Mon Sep 17 00:00:00 2001
From: Ben Mares <services-git-throwaway1@tensorial.com>
Date: Wed, 11 Sep 2024 09:17:55 +0200
Subject: [PATCH 3/3] Fix broken string interpolation

Co-authored-by: Marius van Niekerk <marius.v.niekerk@gmail.com>
---
 conda_lock/lookup.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/conda_lock/lookup.py b/conda_lock/lookup.py
index b7be0626..f2a8801c 100644
--- a/conda_lock/lookup.py
+++ b/conda_lock/lookup.py
@@ -191,6 +191,5 @@ def cached_download_file(url: str) -> bytes:
         except Timeout:
             logger.warning(
                 f"Failed to acquire lock on {destination_lock}, it is likely "
-                f"being downloaded by another process. Retrying...",
-                destination_lock,
+                f"being downloaded by another process. Retrying..."
             )