Skip to content

Commit

Permalink
Merge pull request #10079 from new-resolver-url-equivalent-no-hash
Browse files Browse the repository at this point in the history
Smarter (and looser) link equivalency logic
  • Loading branch information
uranusjr authored Jul 22, 2021
2 parents 5672827 + 2da77e9 commit 26778e9
Show file tree
Hide file tree
Showing 5 changed files with 219 additions and 16 deletions.
7 changes: 7 additions & 0 deletions news/10002.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
New resolver: Loosen URL comparison logic when checking for direct URL reference
equivalency. The logic includes the following notable characteristics:

* The authentication part of the URL is explicitly ignored.
* Most of the fragment part, including ``egg=``, is explicitly ignored. Only
``subdirectory=`` and hash values (e.g. ``sha256=``) are kept.
* The query part of the URL is parsed to allow ordering differences.
70 changes: 66 additions & 4 deletions src/pip/_internal/models/link.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import functools
import logging
import os
import posixpath
import re
import urllib.parse
from typing import TYPE_CHECKING, Optional, Tuple, Union
from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union

from pip._internal.utils.filetypes import WHEEL_EXTENSION
from pip._internal.utils.hashes import Hashes
Expand All @@ -17,6 +19,11 @@
if TYPE_CHECKING:
from pip._internal.index.collector import HTMLPage

logger = logging.getLogger(__name__)


_SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5")


class Link(KeyBasedCompareMixin):
"""Represents a parsed link from a Package Index's simple URL
Expand Down Expand Up @@ -159,7 +166,7 @@ def subdirectory_fragment(self) -> Optional[str]:
return match.group(1)

_hash_re = re.compile(
r'(sha1|sha224|sha384|sha256|sha512|md5)=([a-f0-9]+)'
r'({choices})=([a-f0-9]+)'.format(choices="|".join(_SUPPORTED_HASHES))
)

@property
Expand Down Expand Up @@ -218,6 +225,61 @@ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash)


# TODO: Relax this comparison logic to ignore, for example, fragments.
class _CleanResult(NamedTuple):
"""Convert link for equivalency check.
This is used in the resolver to check whether two URL-specified requirements
likely point to the same distribution and can be considered equivalent. This
equivalency logic avoids comparing URLs literally, which can be too strict
(e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users.
Currently this does three things:
1. Drop the basic auth part. This is technically wrong since a server can
serve different content based on auth, but if it does that, it is even
impossible to guarantee two URLs without auth are equivalent, since
the user can input different auth information when prompted. So the
practical solution is to assume the auth doesn't affect the response.
2. Parse the query to avoid the ordering issue. Note that ordering under the
same key in the query are NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are
still considered different.
3. Explicitly drop most of the fragment part, except ``subdirectory=`` and
hash values, since it should have no impact the downloaded content. Note
that this drops the "egg=" part historically used to denote the requested
project (and extras), which is wrong in the strictest sense, but too many
people are supplying it inconsistently to cause superfluous resolution
conflicts, so we choose to also ignore them.
"""

parsed: urllib.parse.SplitResult
query: Dict[str, List[str]]
subdirectory: str
hashes: Dict[str, str]

@classmethod
def from_link(cls, link: Link) -> "_CleanResult":
parsed = link._parsed_url
netloc = parsed.netloc.rsplit("@", 1)[-1]
fragment = urllib.parse.parse_qs(parsed.fragment)
if "egg" in fragment:
logger.debug("Ignoring egg= fragment in %s", link)
try:
# If there are multiple subdirectory values, use the first one.
# This matches the behavior of Link.subdirectory_fragment.
subdirectory = fragment["subdirectory"][0]
except (IndexError, KeyError):
subdirectory = ""
# If there are multiple hash values under the same algorithm, use the
# first one. This matches the behavior of Link.hash_value.
hashes = {k: fragment[k][0] for k in _SUPPORTED_HASHES if k in fragment}
return cls(
parsed=parsed._replace(netloc=netloc, query="", fragment=""),
query=urllib.parse.parse_qs(parsed.query),
subdirectory=subdirectory,
hashes=hashes,
)


@functools.lru_cache(maxsize=None)
def links_equivalent(link1: Link, link2: Link) -> bool:
return link1 == link2
return _CleanResult.from_link(link1) == _CleanResult.from_link(link2)
13 changes: 2 additions & 11 deletions tests/functional/test_install_reqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,25 +423,16 @@ def test_constraints_constrain_to_local(script, data, resolver_variant):
assert 'Running setup.py install for singlemodule' in result.stdout


def test_constrained_to_url_install_same_url(script, data, resolver_variant):
def test_constrained_to_url_install_same_url(script, data):
to_install = data.src.joinpath("singlemodule")
constraints = path_to_url(to_install) + "#egg=singlemodule"
script.scratch_path.joinpath("constraints.txt").write_text(constraints)
result = script.pip(
'install', '--no-index', '-f', data.find_links, '-c',
script.scratch_path / 'constraints.txt', to_install,
allow_stderr_warning=True,
expect_error=(resolver_variant == "2020-resolver"),
)
if resolver_variant == "2020-resolver":
assert 'Cannot install singlemodule 0.0.1' in result.stderr, str(result)
assert (
'because these package versions have conflicting dependencies.'
in result.stderr
), str(result)
else:
assert ('Running setup.py install for singlemodule'
in result.stdout), str(result)
assert 'Running setup.py install for singlemodule' in result.stdout, str(result)


def test_double_install_spurious_hash_mismatch(
Expand Down
90 changes: 90 additions & 0 deletions tests/functional/test_new_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,6 +1777,96 @@ def test_new_resolver_avoids_incompatible_wheel_tags_in_constraint_url(
assert_not_installed(script, "dep")


@pytest.mark.parametrize(
"suffixes_equivalent, depend_suffix, request_suffix",
[
pytest.param(
True,
"#egg=foo",
"",
id="drop-depend-egg",
),
pytest.param(
True,
"",
"#egg=foo",
id="drop-request-egg",
),
pytest.param(
True,
"#subdirectory=bar&egg=foo",
"#subdirectory=bar&egg=bar",
id="drop-egg-only",
),
pytest.param(
True,
"#subdirectory=bar&egg=foo",
"#egg=foo&subdirectory=bar",
id="fragment-ordering",
),
pytest.param(
True,
"?a=1&b=2",
"?b=2&a=1",
id="query-opordering",
),
pytest.param(
False,
"#sha512=1234567890abcdef",
"#sha512=abcdef1234567890",
id="different-keys",
),
pytest.param(
False,
"#sha512=1234567890abcdef",
"#md5=1234567890abcdef",
id="different-values",
),
pytest.param(
False,
"#subdirectory=bar&egg=foo",
"#subdirectory=rex",
id="drop-egg-still-different",
),
],
)
def test_new_resolver_direct_url_equivalent(
tmp_path,
script,
suffixes_equivalent,
depend_suffix,
request_suffix,
):
pkga = create_basic_wheel_for_package(script, name="pkga", version="1")
pkgb = create_basic_wheel_for_package(
script,
name="pkgb",
version="1",
depends=[f"pkga@{path_to_url(pkga)}{depend_suffix}"],
)

# Make pkgb visible via --find-links, but not pkga.
find_links = tmp_path.joinpath("find_links")
find_links.mkdir()
with open(pkgb, "rb") as f:
find_links.joinpath(pkgb.name).write_bytes(f.read())

# Install pkgb from --find-links, and pkga directly but from a different
# URL suffix as specified in pkgb. This should work!
script.pip(
"install",
"--no-cache-dir", "--no-index",
"--find-links", str(find_links),
f"{path_to_url(pkga)}{request_suffix}", "pkgb",
expect_error=(not suffixes_equivalent),
)

if suffixes_equivalent:
assert_installed(script, pkga="1", pkgb="1")
else:
assert_not_installed(script, "pkga", "pkgb")


def test_new_resolver_direct_url_with_extras(tmp_path, script):
pkg1 = create_basic_wheel_for_package(script, name="pkg1", version="1")
pkg2 = create_basic_wheel_for_package(
Expand Down
55 changes: 54 additions & 1 deletion tests/unit/test_link.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from pip._internal.models.link import Link
from pip._internal.models.link import Link, links_equivalent
from pip._internal.utils.hashes import Hashes


Expand Down Expand Up @@ -138,3 +138,56 @@ def test_is_hash_allowed__none_hashes(self, hashes, expected):
def test_is_vcs(self, url, expected):
link = Link(url)
assert link.is_vcs is expected


@pytest.mark.parametrize(
"url1, url2",
[
pytest.param(
"https://example.com/foo#egg=foo",
"https://example.com/foo",
id="drop-egg",
),
pytest.param(
"https://example.com/foo#subdirectory=bar&egg=foo",
"https://example.com/foo#subdirectory=bar&egg=bar",
id="drop-egg-only",
),
pytest.param(
"https://example.com/foo#subdirectory=bar&egg=foo",
"https://example.com/foo#egg=foo&subdirectory=bar",
id="fragment-ordering",
),
pytest.param(
"https://example.com/foo?a=1&b=2",
"https://example.com/foo?b=2&a=1",
id="query-opordering",
),
],
)
def test_links_equivalent(url1, url2):
assert links_equivalent(Link(url1), Link(url2))


@pytest.mark.parametrize(
"url1, url2",
[
pytest.param(
"https://example.com/foo#sha512=1234567890abcdef",
"https://example.com/foo#sha512=abcdef1234567890",
id="different-keys",
),
pytest.param(
"https://example.com/foo#sha512=1234567890abcdef",
"https://example.com/foo#md5=1234567890abcdef",
id="different-values",
),
pytest.param(
"https://example.com/foo#subdirectory=bar&egg=foo",
"https://example.com/foo#subdirectory=rex",
id="drop-egg-still-different",
),
],
)
def test_links_equivalent_false(url1, url2):
assert not links_equivalent(Link(url1), Link(url2))

0 comments on commit 26778e9

Please sign in to comment.