From 2fc94bf2a0dd673c589aea185ceea87920136737 Mon Sep 17 00:00:00 2001 From: Arun Babu Neelicattu Date: Fri, 8 Mar 2024 18:18:46 +0100 Subject: [PATCH] repo/legacy: add support for simple search Resolves: #2446 --- src/poetry/repositories/legacy_repository.py | 28 ++++++ src/poetry/repositories/link_sources/html.py | 34 ++++++++ tests/repositories/conftest.py | 90 ++++++++++++++++++++ tests/repositories/test_repository_pool.py | 17 ++-- 4 files changed, 164 insertions(+), 5 deletions(-) diff --git a/src/poetry/repositories/legacy_repository.py b/src/poetry/repositories/legacy_repository.py index a6557eeb7d8..74d844836ea 100644 --- a/src/poetry/repositories/legacy_repository.py +++ b/src/poetry/repositories/legacy_repository.py @@ -1,5 +1,7 @@ from __future__ import annotations +from contextlib import suppress +from functools import cached_property from typing import TYPE_CHECKING from typing import Any @@ -11,6 +13,7 @@ from poetry.repositories.exceptions import PackageNotFound from poetry.repositories.http_repository import HTTPRepository from poetry.repositories.link_sources.html import SimpleRepositoryPage +from poetry.repositories.link_sources.html import SimpleRepositoryRootPage if TYPE_CHECKING: @@ -139,3 +142,28 @@ def _get_page(self, name: NormalizedName) -> SimpleRepositoryPage: if not response: raise PackageNotFound(f"Package [{name}] not found.") return SimpleRepositoryPage(response.url, response.text) + + @cached_property + def root_page(self) -> SimpleRepositoryRootPage: + response = self._get_response("/") + + if not response: + self._log( + f"Unable to retrieve package listing from package source {self.name}", + level="error", + ) + return SimpleRepositoryRootPage() + + return SimpleRepositoryRootPage(response.text) + + def search(self, query: str) -> list[Package]: + results: list[Package] = [] + + for candidate in self.root_page.search(query): + with suppress(PackageNotFound): + page = self.get_page(candidate) + + for package in page.packages: + results.append(package) + + return results diff --git a/src/poetry/repositories/link_sources/html.py b/src/poetry/repositories/link_sources/html.py index 7dfbd19e061..3128b15fa1b 100644 --- a/src/poetry/repositories/link_sources/html.py +++ b/src/poetry/repositories/link_sources/html.py @@ -68,6 +68,40 @@ def _link_cache(self) -> LinkCache: return links +class SimpleRepositoryRootPage: + """ + This class represents the parsed content of a "simple" repository's root page. This follows the + specification laid out in PEP 503. + + See: https://peps.python.org/pep-0503/ + """ + + def __init__(self, content: str | None = None) -> None: + parser = HTMLPageParser() + parser.feed(content or "") + self._parsed = parser.anchors + + def search(self, query: str) -> list[str]: + results: list[str] = [] + + for anchor in self._parsed: + href = anchor.get("href") + if href and query in href: + results.append(href.rstrip("/")) + + return results + + @cached_property + def package_names(self) -> list[str]: + results: list[str] = [] + + for anchor in self._parsed: + if href := anchor.get("href"): + results.append(href.rstrip("/")) + + return results + + class SimpleRepositoryPage(HTMLPage): def __init__(self, url: str, content: str) -> None: if not url.endswith("/"): diff --git a/tests/repositories/conftest.py b/tests/repositories/conftest.py index 1f9a6d11946..2af9c7d5287 100644 --- a/tests/repositories/conftest.py +++ b/tests/repositories/conftest.py @@ -1,16 +1,21 @@ from __future__ import annotations import posixpath +import re from pathlib import Path from typing import TYPE_CHECKING from typing import Any +from urllib.parse import urlparse import pytest import requests if TYPE_CHECKING: + from httpretty import httpretty + from httpretty.core import HTTPrettyRequest + from tests.types import HTMLPageGetter from tests.types import RequestsSessionGet @@ -57,3 +62,88 @@ def metadata_mock(url: str, **__: Any) -> requests.Response: raise requests.HTTPError() return metadata_mock + + +@pytest.fixture(scope="session") +def legacy_repository_directory() -> Path: + return Path(__file__).parent / "fixtures" / "legacy" + + +@pytest.fixture(scope="session") +def legacy_repository_package_names(legacy_repository_directory: Path) -> set[str]: + return { + package_html_file.stem + for package_html_file in legacy_repository_directory.glob("*.html") + } + + +@pytest.fixture(scope="session") +def legacy_repository_index_html( + legacy_repository_directory: Path, legacy_repository_package_names: set[str] +) -> str: + hrefs = [ + f'{name}
' for name in legacy_repository_package_names + ] + + return f""" + + + Legacy Repository + + + {"".join(hrefs)} + + + + """ + + +@pytest.fixture(scope="session") +def legacy_repository_url() -> str: + return "https://legacy.foo.bar" + + +@pytest.fixture +def mock_http_legacy_repository( + http: type[httpretty], + legacy_repository_url: str, + legacy_repository_directory: Path, + legacy_repository_index_html: str, +) -> None: + def file_callback( + request: HTTPrettyRequest, uri: str, headers: dict[str, Any] + ) -> list[int | dict[str, Any] | bytes]: + name = Path(urlparse(uri).path).name + fixture = legacy_repository_directory.parent / "pypi.org" / "dists" / name + + if not fixture.exists(): + return [404, headers, b"Not Found"] + + return [200, headers, fixture.read_bytes()] + + http.register_uri( + http.GET, + re.compile("^https://files.pythonhosted.org/.*$"), + body=file_callback, + ) + + def html_callback( + request: HTTPrettyRequest, uri: str, headers: dict[str, Any] + ) -> list[int | dict[str, Any] | bytes]: + url_path = urlparse(uri).path + + if name := url_path.strip("/"): + fixture = legacy_repository_directory / f"{name}.html" + + if not fixture.exists(): + return [404, headers, b"Not Found"] + + return [200, headers, fixture.read_bytes()] + + return [200, headers, legacy_repository_index_html.encode("utf-8")] + + http.register_uri( + http.GET, + re.compile(f"^{legacy_repository_url}/?(.*)?$"), + body=html_callback, + ) diff --git a/tests/repositories/test_repository_pool.py b/tests/repositories/test_repository_pool.py index 2a62f360815..0f2245ba263 100644 --- a/tests/repositories/test_repository_pool.py +++ b/tests/repositories/test_repository_pool.py @@ -351,10 +351,17 @@ def test_search_no_legacy_repositories() -> None: assert pool.search("nothing") == [] -def test_search_legacy_repositories_are_skipped() -> None: - package = get_package("foo", "1.0.0") - repo1 = Repository("repo1", [package]) - repo2 = LegacyRepository("repo2", "https://fake.repo/") +def test_search_legacy_repositories_are_not_skipped( + legacy_repository_url: str, mock_http_legacy_repository: None +) -> None: + foo_package = get_package("foo", "1.0.0") + demo_package = get_package("demo", "0.1.0") + + repo1 = Repository("repo1", [foo_package]) + repo2 = LegacyRepository("repo2", legacy_repository_url) pool = RepositoryPool([repo1, repo2]) - assert pool.search("foo") == [package] + assert pool.search("foo") == [foo_package] + + assert repo1.search("demo") == [] + assert repo2.search("demo") == pool.search("demo") == [demo_package]