Skip to content

Commit

Permalink
Fix round-trip of IPv6 addresses
Browse files Browse the repository at this point in the history
The brackets were missing when the URL was convert to a string

fixes #1157
  • Loading branch information
bdraco committed Sep 26, 2024
1 parent bf2ced7 commit ea3868f
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 9 deletions.
15 changes: 15 additions & 0 deletions tests/test_url_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,3 +604,18 @@ def test_schemes_that_require_host(scheme: str) -> None:
)
with pytest.raises(ValueError, match=expect):
URL(f"{scheme}://:1")


@pytest.mark.parametrize(
("url", "hostname"),
[("http://[::1]", "[::1]"), ("http://[::1]:8080", "[::1]")],
)
def test_ipv6_url_round_trips(url: str, hostname: str) -> None:
"""Verify that IPv6 URLs round-trip correctly."""
parsed = URL(url)
hostname_without_brackets = hostname[1:-1]
assert parsed._val.hostname == hostname_without_brackets
assert parsed.raw_host == hostname_without_brackets
assert parsed.literal_host == hostname
assert str(parsed) == url
assert str(URL(str(parsed))) == url
38 changes: 29 additions & 9 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ def __str__(self) -> str:
netloc=self._make_netloc(
self.raw_user,
self.raw_password,
self.raw_host,
self.literal_host,
port,
encode_host=False,
)
Expand Down Expand Up @@ -647,6 +647,8 @@ def raw_host(self) -> Union[str, None]:
None for relative URLs.
For literal IPv6 addresses, use the literal_host property instead
as it will return the host part with brackets.
"""
# Use host instead of hostname for sake of shortness
# May add .hostname prop later
Expand All @@ -660,16 +662,33 @@ def host(self) -> Union[str, None]:
None for relative URLs.
"""
raw = self.raw_host
if raw is None:
if (raw := self.raw_host) is None:
return None
if "%" in raw:
# Hack for scoped IPv6 addresses like
# fe80::2%Перевірка
# presence of '%' sign means only IPv6 address, so idna is useless.
if raw and (":" in raw or raw[-1].isdigit()):
# IP addresses are never IDNA encoded
return raw
return _idna_decode(raw)

@cached_property
def literal_host(self) -> Union[str, None]:
"""Return the literal host part of URL.
None for relative URLs.
https://datatracker.ietf.org/doc/html/rfc2732#section-2
Examples:
'http://example.com:8080' -> 'example.com'
'http://example.com:80' -> 'example.com'
'https://127.0.0.1:8443' -> '127.0.0.1'
'https://[::1]:8443' -> '[::1]'
'http://[::1]' -> '[::1]'
"""
if (raw := self.raw_host) is None:
return None
return f"[{raw}]" if ":" in raw else raw

@cached_property
def port(self) -> Union[int, None]:
"""Port part of URL, with scheme-based fallback.
Expand Down Expand Up @@ -945,15 +964,16 @@ def _encode_host(cls, host: str, human: bool = False) -> str:
raw_ip = host
sep = zone = ""

if raw_ip and raw_ip[-1].isdigit() or ":" in raw_ip:
if raw_ip and (":" in raw_ip or raw_ip[-1].isdigit()):
# Might be an IP address, check it
#
# IP Addresses can look like:
# https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
# - 127.0.0.1 (last character is a digit)
# - 2001:db8::ff00:42:8329 (contains a colon)
# - 2001:db8::ff00:42:8329%eth0 (contains a colon)
# - [2001:db8::ff00:42:8329] (contains a colon)
# - [2001:db8::ff00:42:8329] (contains a colon -- brackets should
# have been removed before it gets here)
# Rare IP Address formats are not supported per:
# https://datatracker.ietf.org/doc/html/rfc3986#section-7.4
#
Expand Down

0 comments on commit ea3868f

Please sign in to comment.