From 12c5206a7a532e01e10f5ab4dbdbf48e6708c41e Mon Sep 17 00:00:00 2001 From: Dos Moonen Date: Thu, 15 Dec 2022 15:29:31 +0100 Subject: [PATCH] Refactor `_get_index_url()` to get integration tests working again and add tests to make sure it stays that way. Keyring support via the 'subprocess' provider can only retrieve a password, not a username-password combo. The username therefor MUST come from the URL. If the URL obtained from the index does not contain a username then the username from a matching index is used. `_get_index_url()` does that matching. The problem this refactoring solves is that the URL where a wheel or sdist can be downloaded from does not always start with the index url. Azure DevOps Artifacts Feeds are an example since it replaces the friendly name of the Feed with the GUID of the Feed. Causing `url.startswith(prefix)` to evaluate as `False`. The new behaviour is to return the index which matches the netloc and has the longest common prefix of the `path` property of the value returned by `urllib.parse.urlsplit()`. The behaviour for resolving ties is unspecified. --- src/pip/_internal/network/auth.py | 43 ++++++++++++++++--- tests/unit/test_network_auth.py | 71 ++++++++++++++++++++++++++----- 2 files changed, 98 insertions(+), 16 deletions(-) diff --git a/src/pip/_internal/network/auth.py b/src/pip/_internal/network/auth.py index e1ad7bff9ca..8e54b63bea2 100644 --- a/src/pip/_internal/network/auth.py +++ b/src/pip/_internal/network/auth.py @@ -13,6 +13,7 @@ import urllib.parse from abc import ABC, abstractmethod from functools import lru_cache +from os.path import commonprefix from pathlib import Path from typing import Any, Dict, List, NamedTuple, Optional, Tuple @@ -282,11 +283,43 @@ def _get_index_url(self, url: str) -> Optional[str]: if not url or not self.index_urls: return None - for u in self.index_urls: - prefix = remove_auth_from_url(u).rstrip("/") + "/" - if url.startswith(prefix): - return u - return None + url = remove_auth_from_url(url).rstrip("/") + "/" + parsed_url = urllib.parse.urlsplit(url) + + candidates_netloc_match = [] + candidates_netloc_path_match = [] + + for index in self.index_urls: + index = index.rstrip("/") + "/" + parsed_index = urllib.parse.urlsplit(remove_auth_from_url(index)) + if parsed_url == parsed_index: + return index + + if parsed_url.netloc != parsed_index.netloc: + continue + + candidate = urllib.parse.urlsplit(index) + candidates_netloc_match.append(candidate) + + if os.path.commonprefix([parsed_url.path, parsed_index.path]).rfind("/"): + candidates_netloc_path_match.append(candidate) + + candidates = candidates_netloc_path_match or candidates_netloc_match + + if not candidates: + return None + + candidates.sort( + reverse=True, + key=lambda candidate: commonprefix( + [ + parsed_url.path, + candidate.path, + ] + ).rfind("/"), + ) + + return urllib.parse.urlunsplit(candidates[0]) def _get_new_credentials( self, diff --git a/tests/unit/test_network_auth.py b/tests/unit/test_network_auth.py index 3e9d9a79c90..16901ea824b 100644 --- a/tests/unit/test_network_auth.py +++ b/tests/unit/test_network_auth.py @@ -102,7 +102,12 @@ def test_get_credentials_uses_cached_credentials_only_username() -> None: def test_get_index_url_credentials() -> None: - auth = MultiDomainBasicAuth(index_urls=["http://foo:bar@example.com/path"]) + auth = MultiDomainBasicAuth( + index_urls=[ + "http://example.com/", + "http://foo:bar@example.com/path", + ] + ) get = functools.partial( auth._get_new_credentials, allow_netrc=False, allow_keyring=False ) @@ -112,6 +117,45 @@ def test_get_index_url_credentials() -> None: assert get("http://example.com/path3/path2") == (None, None) +def test_prioritize_longest_path_prefix_match_organization() -> None: + auth = MultiDomainBasicAuth( + index_urls=[ + "http://foo:bar@example.com/org-name-alpha/repo-alias/simple", + "http://bar:foo@example.com/org-name-beta/repo-alias/simple", + ] + ) + get = functools.partial( + auth._get_new_credentials, allow_netrc=False, allow_keyring=False + ) + + # Inspired by Azure DevOps URL structure, GitLab should look similar + assert get("http://example.com/org-name-alpha/repo-guid/dowbload/") == ( + "foo", + "bar", + ) + assert get("http://example.com/org-name-beta/repo-guid/dowbload/") == ("bar", "foo") + + +def test_prioritize_longest_path_prefix_match_project() -> None: + auth = MultiDomainBasicAuth( + index_urls=[ + "http://foo:bar@example.com/org-alpha/project-name-alpha/repo-alias/simple", + "http://bar:foo@example.com/org-alpha/project-name-beta/repo-alias/simple", + ] + ) + get = functools.partial( + auth._get_new_credentials, allow_netrc=False, allow_keyring=False + ) + + # Inspired by Azure DevOps URL structure, GitLab should look similar + assert get( + "http://example.com/org-alpha/project-name-alpha/repo-guid/dowbload/" + ) == ("foo", "bar") + assert get( + "http://example.com/org-alpha/project-name-beta/repo-guid/dowbload/" + ) == ("bar", "foo") + + class KeyringModuleV1: """Represents the supported API of keyring before get_credential was added. @@ -123,7 +167,7 @@ def __init__(self) -> None: def get_password(self, system: str, username: str) -> Optional[str]: if system == "example.com" and username: return username + "!netloc" - if system == "http://example.com/path2" and username: + if system == "http://example.com/path2/" and username: return username + "!url" return None @@ -136,8 +180,8 @@ def set_password(self, system: str, username: str, password: str) -> None: ( ("http://example.com/path1", (None, None)), # path1 URLs will be resolved by netloc - ("http://user@example.com/path1", ("user", "user!netloc")), - ("http://user2@example.com/path1", ("user2", "user2!netloc")), + ("http://user@example.com/path3", ("user", "user!netloc")), + ("http://user2@example.com/path3", ("user2", "user2!netloc")), # path2 URLs will be resolved by index URL ("http://example.com/path2/path3", (None, None)), ("http://foo@example.com/path2/path3", ("foo", "foo!url")), @@ -151,7 +195,8 @@ def test_keyring_get_password( keyring = KeyringModuleV1() monkeypatch.setitem(sys.modules, "keyring", keyring) # type: ignore[misc] auth = MultiDomainBasicAuth( - index_urls=["http://example.com/path2"], keyring_provider="import" + index_urls=["http://example.com/path2", "http://example.com/path3"], + keyring_provider="import", ) actual = auth._get_new_credentials(url, allow_netrc=False, allow_keyring=True) @@ -199,7 +244,8 @@ def test_keyring_get_password_username_in_index( keyring = KeyringModuleV1() monkeypatch.setitem(sys.modules, "keyring", keyring) # type: ignore[misc] auth = MultiDomainBasicAuth( - index_urls=["http://user@example.com/path2"], keyring_provider="import" + index_urls=["http://user@example.com/path2", "http://example.com/path4"], + keyring_provider="import", ) get = functools.partial( auth._get_new_credentials, allow_netrc=False, allow_keyring=True @@ -290,7 +336,7 @@ def get_password(self, system: str, username: str) -> None: assert False, "get_password should not ever be called" def get_credential(self, system: str, username: str) -> Optional[Credential]: - if system == "http://example.com/path2": + if system == "http://example.com/path2/": return self.Credential("username", "url") if system == "example.com": return self.Credential("username", "netloc") @@ -310,7 +356,8 @@ def test_keyring_get_credential( ) -> None: monkeypatch.setitem(sys.modules, "keyring", KeyringModuleV2()) # type: ignore[misc] auth = MultiDomainBasicAuth( - index_urls=["http://example.com/path2"], keyring_provider="import" + index_urls=["http://example.com/path1", "http://example.com/path2"], + keyring_provider="import", ) assert ( @@ -375,6 +422,7 @@ def __call__( self.returncode = 1 else: # Passwords are returned encoded with a newline appended + self.returncode = 0 self.stdout = (password + os.linesep).encode("utf-8") if cmd[1] == "set": @@ -400,8 +448,8 @@ def check_returncode(self) -> None: ( ("http://example.com/path1", (None, None)), # path1 URLs will be resolved by netloc - ("http://user@example.com/path1", ("user", "user!netloc")), - ("http://user2@example.com/path1", ("user2", "user2!netloc")), + ("http://user@example.com/path3", ("user", "user!netloc")), + ("http://user2@example.com/path3", ("user2", "user2!netloc")), # path2 URLs will be resolved by index URL ("http://example.com/path2/path3", (None, None)), ("http://foo@example.com/path2/path3", ("foo", "foo!url")), @@ -417,7 +465,8 @@ def test_keyring_cli_get_password( pip._internal.network.auth.subprocess, "run", KeyringSubprocessResult() ) auth = MultiDomainBasicAuth( - index_urls=["http://example.com/path2"], keyring_provider="subprocess" + index_urls=["http://example.com/path2", "http://example.com/path3"], + keyring_provider="subprocess", ) actual = auth._get_new_credentials(url, allow_netrc=False, allow_keyring=True)