diff --git a/news/10002.feature.rst b/news/10002.feature.rst new file mode 100644 index 00000000000..c292f7ebe6d --- /dev/null +++ b/news/10002.feature.rst @@ -0,0 +1,6 @@ +New resolver: Loosen URL comparison logic when checking for direct URL reference +equivalency. The logic includes the following notable characteristics: + +* The authentication part of the URL is explicitly ignored. +* The ``egg=`` fragment is explicitly ignored. +* Query and fragment parts of the URL are hashed to allow ordering differences. diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index ebee3839598..850d7725c0d 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -1,8 +1,9 @@ +import logging import os import posixpath import re import urllib.parse -from typing import TYPE_CHECKING, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union from pip._internal.utils.filetypes import WHEEL_EXTENSION from pip._internal.utils.hashes import Hashes @@ -17,6 +18,8 @@ if TYPE_CHECKING: from pip._internal.index.collector import HTMLPage +logger = logging.getLogger(__name__) + class Link(KeyBasedCompareMixin): """Represents a parsed link from a Package Index's simple URL @@ -242,7 +245,48 @@ def is_hash_allowed(self, hashes): return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash) -# TODO: Relax this comparison logic to ignore, for example, fragments. +class _CleanResult(NamedTuple): + """Convert link for equivalency check. + + This is used in the resolver to check whether two URL-specified requirements + likely point to the same distribution and can be considered equivalent. This + equivalency logic avoids comparing URLs literally, which can be too strict + (e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users. + + Currently this does three things: + + 1. Drop the basic auth part. This is technically wrong since a server can + serve different content based on auth, but if it does that, it is even + impossible to guarantee two URLs without auth are equivalent, since + the user can input different auth information when prompted. So the + practical solution is to assume the auth doesn't affect the response. + 2. Parse the query to avoid the ordering issue. + 3. Parse the fragment, and explicitly drop the "egg=" part since it is + commonly provided as the project name for compatibility. This is wrong in + the strictest sense, but too many people are doing it. + + Note that query value ordering under the same key in query and fragment are + NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are still considered different. + """ + + parsed: urllib.parse.SplitResult + query: Dict[str, List[str]] + fragment: Dict[str, List[str]] + + @classmethod + def from_link(cls, link: Link) -> "_CleanResult": + parsed = link._parsed_url + netloc = parsed.netloc.rsplit("@", 1)[-1] + fragment = urllib.parse.parse_qs(parsed.fragment) + if fragment.pop("egg", None): + logger.debug("Ignoring egg= fragment in %s", link) + return _CleanResult( + parsed=parsed._replace(netloc=netloc, query="", fragment=""), + query=urllib.parse.parse_qs(parsed.query), + fragment=fragment, + ) + + def links_equivalent(link1, link2): # type: (Link, Link) -> bool - return link1 == link2 + return _CleanResult.from_link(link1) == _CleanResult.from_link(link2) diff --git a/tests/functional/test_install_reqs.py b/tests/functional/test_install_reqs.py index d8de6248b7d..6df48927569 100644 --- a/tests/functional/test_install_reqs.py +++ b/tests/functional/test_install_reqs.py @@ -423,7 +423,7 @@ def test_constraints_constrain_to_local(script, data, resolver_variant): assert 'Running setup.py install for singlemodule' in result.stdout -def test_constrained_to_url_install_same_url(script, data, resolver_variant): +def test_constrained_to_url_install_same_url(script, data): to_install = data.src.joinpath("singlemodule") constraints = path_to_url(to_install) + "#egg=singlemodule" script.scratch_path.joinpath("constraints.txt").write_text(constraints) @@ -431,17 +431,8 @@ def test_constrained_to_url_install_same_url(script, data, resolver_variant): 'install', '--no-index', '-f', data.find_links, '-c', script.scratch_path / 'constraints.txt', to_install, allow_stderr_warning=True, - expect_error=(resolver_variant == "2020-resolver"), ) - if resolver_variant == "2020-resolver": - assert 'Cannot install singlemodule 0.0.1' in result.stderr, str(result) - assert ( - 'because these package versions have conflicting dependencies.' - in result.stderr - ), str(result) - else: - assert ('Running setup.py install for singlemodule' - in result.stdout), str(result) + assert 'Running setup.py install for singlemodule' in result.stdout, str(result) def test_double_install_spurious_hash_mismatch( diff --git a/tests/functional/test_new_resolver.py b/tests/functional/test_new_resolver.py index 68eb3b1c010..7b09d18b876 100644 --- a/tests/functional/test_new_resolver.py +++ b/tests/functional/test_new_resolver.py @@ -1791,6 +1791,96 @@ def test_new_resolver_avoids_incompatible_wheel_tags_in_constraint_url( assert_not_installed(script, "dep") +@pytest.mark.parametrize( + "suffixes_equivalent, depend_suffix, request_suffix", + [ + pytest.param( + True, + "#egg=foo", + "", + id="drop-depend-egg", + ), + pytest.param( + True, + "", + "#egg=foo", + id="drop-request-egg", + ), + pytest.param( + True, + "#subdirectory=bar&egg=foo", + "#subdirectory=bar&egg=bar", + id="drop-egg-only", + ), + pytest.param( + True, + "#subdirectory=bar&egg=foo", + "#egg=foo&subdirectory=bar", + id="fragment-ordering", + ), + pytest.param( + True, + "?a=1&b=2", + "?b=2&a=1", + id="query-opordering", + ), + pytest.param( + False, + "#sha512=1234567890abcdef", + "#sha512=abcdef1234567890", + id="different-keys", + ), + pytest.param( + False, + "#sha512=1234567890abcdef", + "#md5=1234567890abcdef", + id="different-values", + ), + pytest.param( + False, + "#subdirectory=bar&egg=foo", + "#subdirectory=rex", + id="drop-egg-still-different", + ), + ], +) +def test_new_resolver_direct_url_equivalent( + tmp_path, + script, + suffixes_equivalent, + depend_suffix, + request_suffix, +): + pkga = create_basic_wheel_for_package(script, name="pkga", version="1") + pkgb = create_basic_wheel_for_package( + script, + name="pkgb", + version="1", + depends=[f"pkga@{path_to_url(pkga)}{depend_suffix}"], + ) + + # Make pkgb visible via --find-links, but not pkga. + find_links = tmp_path.joinpath("find_links") + find_links.mkdir() + with open(pkgb, "rb") as f: + find_links.joinpath(pkgb.name).write_bytes(f.read()) + + # Install pkgb from --find-links, and pkga directly but from a different + # URL suffix as specified in pkgb. This should work! + script.pip( + "install", + "--no-cache-dir", "--no-index", + "--find-links", str(find_links), + f"{path_to_url(pkga)}{request_suffix}", "pkgb", + expect_error=(not suffixes_equivalent), + ) + + if suffixes_equivalent: + assert_installed(script, pkga="1", pkgb="1") + else: + assert_not_installed(script, "pkga", "pkgb") + + def test_new_resolver_direct_url_with_extras(tmp_path, script): pkg1 = create_basic_wheel_for_package(script, name="pkg1", version="1") pkg2 = create_basic_wheel_for_package( diff --git a/tests/unit/test_link.py b/tests/unit/test_link.py index a9e75e38bd3..77f569333a4 100644 --- a/tests/unit/test_link.py +++ b/tests/unit/test_link.py @@ -1,6 +1,6 @@ import pytest -from pip._internal.models.link import Link +from pip._internal.models.link import Link, links_equivalent from pip._internal.utils.hashes import Hashes @@ -138,3 +138,56 @@ def test_is_hash_allowed__none_hashes(self, hashes, expected): def test_is_vcs(self, url, expected): link = Link(url) assert link.is_vcs is expected + + +@pytest.mark.parametrize( + "url1, url2", + [ + pytest.param( + "https://example.com/foo#egg=foo", + "https://example.com/foo", + id="drop-egg", + ), + pytest.param( + "https://example.com/foo#subdirectory=bar&egg=foo", + "https://example.com/foo#subdirectory=bar&egg=bar", + id="drop-egg-only", + ), + pytest.param( + "https://example.com/foo#subdirectory=bar&egg=foo", + "https://example.com/foo#egg=foo&subdirectory=bar", + id="fragment-ordering", + ), + pytest.param( + "https://example.com/foo?a=1&b=2", + "https://example.com/foo?b=2&a=1", + id="query-opordering", + ), + ], +) +def test_links_equivalent(url1, url2): + assert links_equivalent(Link(url1), Link(url2)) + + +@pytest.mark.parametrize( + "url1, url2", + [ + pytest.param( + "https://example.com/foo#sha512=1234567890abcdef", + "https://example.com/foo#sha512=abcdef1234567890", + id="different-keys", + ), + pytest.param( + "https://example.com/foo#sha512=1234567890abcdef", + "https://example.com/foo#md5=1234567890abcdef", + id="different-values", + ), + pytest.param( + "https://example.com/foo#subdirectory=bar&egg=foo", + "https://example.com/foo#subdirectory=rex", + id="drop-egg-still-different", + ), + ], +) +def test_links_equivalent_false(url1, url2): + assert not links_equivalent(Link(url1), Link(url2))