diff --git a/docs/repositories.md b/docs/repositories.md
index 490d78bf785..481437a46d2 100644
--- a/docs/repositories.md
+++ b/docs/repositories.md
@@ -237,6 +237,20 @@ Note the trailing `/simple/`. This is important when configuring
{{% /note %}}
+Repositories following the [PEP 503](https://peps.python.org/pep-0503/)
+specification should expose a root page with individual links for each
+package it serves. This isn't reliably implemented everywhere, which
+leads to increased network traffic and slower resolve times. If you're
+using a repository which has a valid listing, you can add the
+`indexed` property to let Poetry prefetch and cache this package list.
+
+```toml
+[[tool.poetry.source]]
+name = "foo"
+url = "https://foo.bar/simple/"
+indexed = true
+```
+
In addition to [PEP 503](https://peps.python.org/pep-0503/), Poetry can also handle simple API
repositories that implement [PEP 658](https://peps.python.org/pep-0658/) (*Introduced in 1.2.0*).
This is helpful in reducing dependency resolution time for packages from these sources as Poetry can
diff --git a/src/poetry/config/source.py b/src/poetry/config/source.py
index f3af0c589e2..c4836a73139 100644
--- a/src/poetry/config/source.py
+++ b/src/poetry/config/source.py
@@ -9,6 +9,7 @@ class Source:
url: str
default: bool = dataclasses.field(default=False)
secondary: bool = dataclasses.field(default=False)
+ indexed: bool = dataclasses.field(default=False)
def to_dict(self) -> dict[str, str | bool]:
return dataclasses.asdict(self)
diff --git a/src/poetry/factory.py b/src/poetry/factory.py
index f1ab8ec99fd..15c446f5528 100644
--- a/src/poetry/factory.py
+++ b/src/poetry/factory.py
@@ -174,6 +174,7 @@ def configure_sources(
def create_package_source(
cls, source: dict[str, str], auth_config: Config, disable_cache: bool = False
) -> LegacyRepository:
+ from poetry.repositories.indexed import IndexedLegacyRepository
from poetry.repositories.legacy_repository import LegacyRepository
from poetry.repositories.single_page_repository import SinglePageRepository
@@ -185,11 +186,18 @@ def create_package_source(
raise RuntimeError("Missing [name] in source.")
name = source["name"]
url = source["url"]
+ indexed = bool(source.get("indexed", False))
repository_class = LegacyRepository
if re.match(r".*\.(htm|html)$", url):
repository_class = SinglePageRepository
+ if indexed:
+ raise RuntimeError(
+ "cannot set indexed=True for a single-page repository"
+ )
+ elif indexed:
+ repository_class = IndexedLegacyRepository
return repository_class(
name,
diff --git a/src/poetry/repositories/indexed.py b/src/poetry/repositories/indexed.py
new file mode 100644
index 00000000000..0ec18bed37e
--- /dev/null
+++ b/src/poetry/repositories/indexed.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from poetry.repositories.exceptions import RepositoryError
+from poetry.repositories.legacy_repository import LegacyRepository
+from poetry.repositories.link_sources.html import SimpleIndexPage
+
+
+if TYPE_CHECKING:
+ from poetry.core.packages.dependency import Dependency
+ from poetry.core.packages.package import Package
+
+ from poetry.config.config import Config
+
+
+class IndexedLegacyRepository(LegacyRepository):
+ def __init__(
+ self,
+ name: str,
+ url: str,
+ config: Config | None = None,
+ disable_cache: bool = False,
+ ) -> None:
+ super().__init__(name, url.rstrip("/"), config, disable_cache)
+
+ self._index_page = self._get_index_page()
+
+ def find_packages(self, dependency: Dependency) -> list[Package]:
+ if not self._index_page.serves_package(dependency.name):
+ return []
+
+ return super().find_packages(dependency)
+
+ def _get_index_page(self) -> SimpleIndexPage:
+ response = self._get_response("")
+ if not response:
+ raise RepositoryError(
+ f"Failed fetching index page for repository {self.name}"
+ )
+ return SimpleIndexPage(response.url, response.text)
diff --git a/src/poetry/repositories/link_sources/html.py b/src/poetry/repositories/link_sources/html.py
index c3c3cc4ce40..ab002444bc8 100644
--- a/src/poetry/repositories/link_sources/html.py
+++ b/src/poetry/repositories/link_sources/html.py
@@ -9,6 +9,7 @@
from poetry.core.packages.utils.link import Link
from poetry.repositories.link_sources.base import LinkSource
+from poetry.utils.helpers import canonicalize_name
if TYPE_CHECKING:
@@ -46,3 +47,34 @@ def __init__(self, url: str, content: str) -> None:
if not url.endswith("/"):
url += "/"
super().__init__(url=url, content=content)
+
+
+class SimpleIndexPage:
+ """Describes the root page of a PEP 503 compliant repository.
+
+ This contains a list of links, each one corresponding to a served project.
+ """
+
+ def __init__(self, url: str, content: str) -> None:
+ if not url.endswith("/"):
+ url += "/"
+
+ self._url = url
+ self._content = content
+ self._parsed = html5lib.parse(content, namespaceHTMLElements=False)
+ self._cached_packages = set(self.links)
+
+ @property
+ def links(self) -> Iterator[str]:
+ # Note: PEP426 specifies that comparisons should be
+ # case-insensitive. For simplicity, we'll do lookups using
+ # lowercase-naming, and treating - and _ equivalently.
+ for anchor in self._parsed.findall(".//a"):
+ text: str | None = anchor.text
+ if text is None:
+ continue
+
+ yield canonicalize_name(text)
+
+ def serves_package(self, name: str) -> bool:
+ return canonicalize_name(name) in self._cached_packages
diff --git a/tests/repositories/fixtures/legacy/index.html b/tests/repositories/fixtures/legacy/index.html
new file mode 100644
index 00000000000..a66b1c6d759
--- /dev/null
+++ b/tests/repositories/fixtures/legacy/index.html
@@ -0,0 +1,3 @@
+pyyaml
+missing-version
+black
diff --git a/tests/repositories/test_legacy_repository.py b/tests/repositories/test_legacy_repository.py
index bb0c33f6a76..f3b58750a0f 100644
--- a/tests/repositories/test_legacy_repository.py
+++ b/tests/repositories/test_legacy_repository.py
@@ -15,7 +15,9 @@
from poetry.factory import Factory
from poetry.repositories.exceptions import PackageNotFound
from poetry.repositories.exceptions import RepositoryError
+from poetry.repositories.indexed import IndexedLegacyRepository
from poetry.repositories.legacy_repository import LegacyRepository
+from poetry.repositories.link_sources.html import SimpleIndexPage
from poetry.repositories.link_sources.html import SimpleRepositoryPage
@@ -388,6 +390,64 @@ def test_get_package_retrieves_packages_with_no_hashes():
] == package.files
+class MockIndexedRepository(MockRepository, IndexedLegacyRepository):
+ def _get_index_page(self) -> SimpleIndexPage | None:
+ fixture = self.FIXTURES / "index.html"
+ if not fixture.exists():
+ return
+
+ with fixture.open(encoding="utf-8") as f:
+ return SimpleIndexPage(self._url + "/", f.read())
+
+
+def test_indexed_root_page_has_valid_content():
+ repo = MockIndexedRepository()
+ assert repo._index_page.serves_package("pyyaml")
+
+
+def test_indexed_fails_on_missing():
+ repo = MockIndexedRepository()
+
+ packages = repo.find_packages(Factory.create_dependency("this-doesnt-exist", "*"))
+
+ assert packages == []
+
+
+def test_indexed_succeeds_on_existing():
+ repo = MockIndexedRepository()
+
+ packages = repo.find_packages(Factory.create_dependency("pyyaml", "*"))
+
+ assert len(packages) == 1
+
+
+def test_indexed_pep426_underscore_hyphen():
+ repo = MockIndexedRepository()
+
+ # 'missing-version' in the index
+ assert repo._index_page.serves_package("missing_version")
+
+
+def test_indexed_pep426_case_insensitive():
+ repo = MockIndexedRepository()
+
+ # 'black' in the index
+ assert repo._index_page.serves_package("Black")
+
+
+def test_indexed_retrieves_package_with_no_hashes():
+ repo = MockIndexedRepository()
+
+ package = repo.package("jupyter", "1.0.0")
+
+ assert [
+ {
+ "file": "jupyter-1.0.0.tar.gz",
+ "hash": "sha256:d9dc4b3318f310e34c82951ea5d6683f67bed7def4b259fafbfe4f1beb1d8e5f", # noqa: E501
+ }
+ ] == package.files
+
+
class MockHttpRepository(LegacyRepository):
def __init__(
self, endpoint_responses: dict, http: type[httpretty.httpretty]