Skip to content

Commit

Permalink
Refactor _get_index_url() to get integration tests working again an…
Browse files Browse the repository at this point in the history
…d add tests to make sure it stays that way.

Keyring support via the 'subprocess' provider can only retrieve a password, not a username-password combo. The username therefor MUST come from the URL.
If the URL obtained from the index does not contain a username then the username from a matching index is used. `_get_index_url()` does that matching.

The problem this refactoring solves is that the URL where a wheel or sdist can be downloaded from does not always start with the index url. Azure DevOps Artifacts Feeds are an example since it replaces the friendly name of the Feed with the GUID of the Feed. Causing `url.startswith(prefix)` to evaluate as `False`.

The new behaviour is to return the index which matches the netloc and has the longest common prefix of the `path` property of the value returned by `urllib.parse.urlsplit()`. The behaviour for resolving ties is unspecified.
  • Loading branch information
Dos Moonen committed Jan 5, 2023
1 parent 9ece8e3 commit 12c5206
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 16 deletions.
43 changes: 38 additions & 5 deletions src/pip/_internal/network/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import urllib.parse
from abc import ABC, abstractmethod
from functools import lru_cache
from os.path import commonprefix
from pathlib import Path
from typing import Any, Dict, List, NamedTuple, Optional, Tuple

Expand Down Expand Up @@ -282,11 +283,43 @@ def _get_index_url(self, url: str) -> Optional[str]:
if not url or not self.index_urls:
return None

for u in self.index_urls:
prefix = remove_auth_from_url(u).rstrip("/") + "/"
if url.startswith(prefix):
return u
return None
url = remove_auth_from_url(url).rstrip("/") + "/"
parsed_url = urllib.parse.urlsplit(url)

candidates_netloc_match = []
candidates_netloc_path_match = []

for index in self.index_urls:
index = index.rstrip("/") + "/"
parsed_index = urllib.parse.urlsplit(remove_auth_from_url(index))
if parsed_url == parsed_index:
return index

if parsed_url.netloc != parsed_index.netloc:
continue

candidate = urllib.parse.urlsplit(index)
candidates_netloc_match.append(candidate)

if os.path.commonprefix([parsed_url.path, parsed_index.path]).rfind("/"):
candidates_netloc_path_match.append(candidate)

candidates = candidates_netloc_path_match or candidates_netloc_match

if not candidates:
return None

candidates.sort(
reverse=True,
key=lambda candidate: commonprefix(
[
parsed_url.path,
candidate.path,
]
).rfind("/"),
)

return urllib.parse.urlunsplit(candidates[0])

def _get_new_credentials(
self,
Expand Down
71 changes: 60 additions & 11 deletions tests/unit/test_network_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,12 @@ def test_get_credentials_uses_cached_credentials_only_username() -> None:


def test_get_index_url_credentials() -> None:
auth = MultiDomainBasicAuth(index_urls=["http://foo:bar@example.com/path"])
auth = MultiDomainBasicAuth(
index_urls=[
"http://example.com/",
"http://foo:bar@example.com/path",
]
)
get = functools.partial(
auth._get_new_credentials, allow_netrc=False, allow_keyring=False
)
Expand All @@ -112,6 +117,45 @@ def test_get_index_url_credentials() -> None:
assert get("http://example.com/path3/path2") == (None, None)


def test_prioritize_longest_path_prefix_match_organization() -> None:
auth = MultiDomainBasicAuth(
index_urls=[
"http://foo:bar@example.com/org-name-alpha/repo-alias/simple",
"http://bar:foo@example.com/org-name-beta/repo-alias/simple",
]
)
get = functools.partial(
auth._get_new_credentials, allow_netrc=False, allow_keyring=False
)

# Inspired by Azure DevOps URL structure, GitLab should look similar
assert get("http://example.com/org-name-alpha/repo-guid/dowbload/") == (
"foo",
"bar",
)
assert get("http://example.com/org-name-beta/repo-guid/dowbload/") == ("bar", "foo")


def test_prioritize_longest_path_prefix_match_project() -> None:
auth = MultiDomainBasicAuth(
index_urls=[
"http://foo:bar@example.com/org-alpha/project-name-alpha/repo-alias/simple",
"http://bar:foo@example.com/org-alpha/project-name-beta/repo-alias/simple",
]
)
get = functools.partial(
auth._get_new_credentials, allow_netrc=False, allow_keyring=False
)

# Inspired by Azure DevOps URL structure, GitLab should look similar
assert get(
"http://example.com/org-alpha/project-name-alpha/repo-guid/dowbload/"
) == ("foo", "bar")
assert get(
"http://example.com/org-alpha/project-name-beta/repo-guid/dowbload/"
) == ("bar", "foo")


class KeyringModuleV1:
"""Represents the supported API of keyring before get_credential
was added.
Expand All @@ -123,7 +167,7 @@ def __init__(self) -> None:
def get_password(self, system: str, username: str) -> Optional[str]:
if system == "example.com" and username:
return username + "!netloc"
if system == "http://example.com/path2" and username:
if system == "http://example.com/path2/" and username:
return username + "!url"
return None

Expand All @@ -136,8 +180,8 @@ def set_password(self, system: str, username: str, password: str) -> None:
(
("http://example.com/path1", (None, None)),
# path1 URLs will be resolved by netloc
("http://user@example.com/path1", ("user", "user!netloc")),
("http://user2@example.com/path1", ("user2", "user2!netloc")),
("http://user@example.com/path3", ("user", "user!netloc")),
("http://user2@example.com/path3", ("user2", "user2!netloc")),
# path2 URLs will be resolved by index URL
("http://example.com/path2/path3", (None, None)),
("http://foo@example.com/path2/path3", ("foo", "foo!url")),
Expand All @@ -151,7 +195,8 @@ def test_keyring_get_password(
keyring = KeyringModuleV1()
monkeypatch.setitem(sys.modules, "keyring", keyring) # type: ignore[misc]
auth = MultiDomainBasicAuth(
index_urls=["http://example.com/path2"], keyring_provider="import"
index_urls=["http://example.com/path2", "http://example.com/path3"],
keyring_provider="import",
)

actual = auth._get_new_credentials(url, allow_netrc=False, allow_keyring=True)
Expand Down Expand Up @@ -199,7 +244,8 @@ def test_keyring_get_password_username_in_index(
keyring = KeyringModuleV1()
monkeypatch.setitem(sys.modules, "keyring", keyring) # type: ignore[misc]
auth = MultiDomainBasicAuth(
index_urls=["http://user@example.com/path2"], keyring_provider="import"
index_urls=["http://user@example.com/path2", "http://example.com/path4"],
keyring_provider="import",
)
get = functools.partial(
auth._get_new_credentials, allow_netrc=False, allow_keyring=True
Expand Down Expand Up @@ -290,7 +336,7 @@ def get_password(self, system: str, username: str) -> None:
assert False, "get_password should not ever be called"

def get_credential(self, system: str, username: str) -> Optional[Credential]:
if system == "http://example.com/path2":
if system == "http://example.com/path2/":
return self.Credential("username", "url")
if system == "example.com":
return self.Credential("username", "netloc")
Expand All @@ -310,7 +356,8 @@ def test_keyring_get_credential(
) -> None:
monkeypatch.setitem(sys.modules, "keyring", KeyringModuleV2()) # type: ignore[misc]
auth = MultiDomainBasicAuth(
index_urls=["http://example.com/path2"], keyring_provider="import"
index_urls=["http://example.com/path1", "http://example.com/path2"],
keyring_provider="import",
)

assert (
Expand Down Expand Up @@ -375,6 +422,7 @@ def __call__(
self.returncode = 1
else:
# Passwords are returned encoded with a newline appended
self.returncode = 0
self.stdout = (password + os.linesep).encode("utf-8")

if cmd[1] == "set":
Expand All @@ -400,8 +448,8 @@ def check_returncode(self) -> None:
(
("http://example.com/path1", (None, None)),
# path1 URLs will be resolved by netloc
("http://user@example.com/path1", ("user", "user!netloc")),
("http://user2@example.com/path1", ("user2", "user2!netloc")),
("http://user@example.com/path3", ("user", "user!netloc")),
("http://user2@example.com/path3", ("user2", "user2!netloc")),
# path2 URLs will be resolved by index URL
("http://example.com/path2/path3", (None, None)),
("http://foo@example.com/path2/path3", ("foo", "foo!url")),
Expand All @@ -417,7 +465,8 @@ def test_keyring_cli_get_password(
pip._internal.network.auth.subprocess, "run", KeyringSubprocessResult()
)
auth = MultiDomainBasicAuth(
index_urls=["http://example.com/path2"], keyring_provider="subprocess"
index_urls=["http://example.com/path2", "http://example.com/path3"],
keyring_provider="subprocess",
)

actual = auth._get_new_credentials(url, allow_netrc=False, allow_keyring=True)
Expand Down

0 comments on commit 12c5206

Please sign in to comment.