Skip to content

Commit

Permalink
repo/legacy: add support for simple search
Browse files Browse the repository at this point in the history
Resolves: #2446
  • Loading branch information
abn committed Mar 8, 2024
1 parent 5c646c4 commit 2fc94bf
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 5 deletions.
28 changes: 28 additions & 0 deletions src/poetry/repositories/legacy_repository.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from contextlib import suppress
from functools import cached_property
from typing import TYPE_CHECKING
from typing import Any

Expand All @@ -11,6 +13,7 @@
from poetry.repositories.exceptions import PackageNotFound
from poetry.repositories.http_repository import HTTPRepository
from poetry.repositories.link_sources.html import SimpleRepositoryPage
from poetry.repositories.link_sources.html import SimpleRepositoryRootPage


if TYPE_CHECKING:
Expand Down Expand Up @@ -139,3 +142,28 @@ def _get_page(self, name: NormalizedName) -> SimpleRepositoryPage:
if not response:
raise PackageNotFound(f"Package [{name}] not found.")
return SimpleRepositoryPage(response.url, response.text)

@cached_property
def root_page(self) -> SimpleRepositoryRootPage:
response = self._get_response("/")

if not response:
self._log(
f"Unable to retrieve package listing from package source {self.name}",
level="error",
)
return SimpleRepositoryRootPage()

return SimpleRepositoryRootPage(response.text)

def search(self, query: str) -> list[Package]:
results: list[Package] = []

for candidate in self.root_page.search(query):
with suppress(PackageNotFound):
page = self.get_page(candidate)

for package in page.packages:
results.append(package)

return results
34 changes: 34 additions & 0 deletions src/poetry/repositories/link_sources/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,40 @@ def _link_cache(self) -> LinkCache:
return links


class SimpleRepositoryRootPage:
"""
This class represents the parsed content of a "simple" repository's root page. This follows the
specification laid out in PEP 503.
See: https://peps.python.org/pep-0503/
"""

def __init__(self, content: str | None = None) -> None:
parser = HTMLPageParser()
parser.feed(content or "")
self._parsed = parser.anchors

def search(self, query: str) -> list[str]:
results: list[str] = []

for anchor in self._parsed:
href = anchor.get("href")
if href and query in href:
results.append(href.rstrip("/"))

return results

@cached_property
def package_names(self) -> list[str]:
results: list[str] = []

for anchor in self._parsed:
if href := anchor.get("href"):
results.append(href.rstrip("/"))

return results


class SimpleRepositoryPage(HTMLPage):
def __init__(self, url: str, content: str) -> None:
if not url.endswith("/"):
Expand Down
90 changes: 90 additions & 0 deletions tests/repositories/conftest.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
from __future__ import annotations

import posixpath
import re

from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from urllib.parse import urlparse

import pytest
import requests


if TYPE_CHECKING:
from httpretty import httpretty
from httpretty.core import HTTPrettyRequest

from tests.types import HTMLPageGetter
from tests.types import RequestsSessionGet

Expand Down Expand Up @@ -57,3 +62,88 @@ def metadata_mock(url: str, **__: Any) -> requests.Response:
raise requests.HTTPError()

return metadata_mock


@pytest.fixture(scope="session")
def legacy_repository_directory() -> Path:
return Path(__file__).parent / "fixtures" / "legacy"


@pytest.fixture(scope="session")
def legacy_repository_package_names(legacy_repository_directory: Path) -> set[str]:
return {
package_html_file.stem
for package_html_file in legacy_repository_directory.glob("*.html")
}


@pytest.fixture(scope="session")
def legacy_repository_index_html(
legacy_repository_directory: Path, legacy_repository_package_names: set[str]
) -> str:
hrefs = [
f'<a href="{name}/">{name}</a><br>' for name in legacy_repository_package_names
]

return f"""<!DOCTYPE html>
<html>
<head>
Legacy Repository
</head>
<body>
{"".join(hrefs)}
</body>
</html>
<!--TIMESTAMP 1709913893-->
"""


@pytest.fixture(scope="session")
def legacy_repository_url() -> str:
return "https://legacy.foo.bar"


@pytest.fixture
def mock_http_legacy_repository(
http: type[httpretty],
legacy_repository_url: str,
legacy_repository_directory: Path,
legacy_repository_index_html: str,
) -> None:
def file_callback(
request: HTTPrettyRequest, uri: str, headers: dict[str, Any]
) -> list[int | dict[str, Any] | bytes]:
name = Path(urlparse(uri).path).name
fixture = legacy_repository_directory.parent / "pypi.org" / "dists" / name

if not fixture.exists():
return [404, headers, b"Not Found"]

return [200, headers, fixture.read_bytes()]

http.register_uri(
http.GET,
re.compile("^https://files.pythonhosted.org/.*$"),
body=file_callback,
)

def html_callback(
request: HTTPrettyRequest, uri: str, headers: dict[str, Any]
) -> list[int | dict[str, Any] | bytes]:
url_path = urlparse(uri).path

if name := url_path.strip("/"):
fixture = legacy_repository_directory / f"{name}.html"

if not fixture.exists():
return [404, headers, b"Not Found"]

return [200, headers, fixture.read_bytes()]

return [200, headers, legacy_repository_index_html.encode("utf-8")]

http.register_uri(
http.GET,
re.compile(f"^{legacy_repository_url}/?(.*)?$"),
body=html_callback,
)
17 changes: 12 additions & 5 deletions tests/repositories/test_repository_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,10 +351,17 @@ def test_search_no_legacy_repositories() -> None:
assert pool.search("nothing") == []


def test_search_legacy_repositories_are_skipped() -> None:
package = get_package("foo", "1.0.0")
repo1 = Repository("repo1", [package])
repo2 = LegacyRepository("repo2", "https://fake.repo/")
def test_search_legacy_repositories_are_not_skipped(
legacy_repository_url: str, mock_http_legacy_repository: None
) -> None:
foo_package = get_package("foo", "1.0.0")
demo_package = get_package("demo", "0.1.0")

repo1 = Repository("repo1", [foo_package])
repo2 = LegacyRepository("repo2", legacy_repository_url)
pool = RepositoryPool([repo1, repo2])

assert pool.search("foo") == [package]
assert pool.search("foo") == [foo_package]

assert repo1.search("demo") == []
assert repo2.search("demo") == pool.search("demo") == [demo_package]

0 comments on commit 2fc94bf

Please sign in to comment.