Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate ASCII host values #954

Merged
merged 53 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
7f21ff2
Validate ASCII host values
mjpieters Nov 20, 2023
e14f451
Merge branch 'master' into validate_host
bdraco Sep 1, 2024
749afd0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 1, 2024
b746e8c
Merge branch 'master' into validate_host
bdraco Sep 7, 2024
360fdcd
fix incorrect test
bdraco Sep 7, 2024
8054bac
cache validation
bdraco Sep 7, 2024
32ca9fe
cache validation
bdraco Sep 7, 2024
f76b060
Apply suggestions from code review
bdraco Sep 7, 2024
755b0e3
cleanup comment
bdraco Sep 7, 2024
ff5c541
Merge branch 'master' into validate_host
bdraco Sep 7, 2024
1110685
fix tests
bdraco Sep 7, 2024
701aef5
Merge remote-tracking branch 'origin/validate_host' into validate_host
bdraco Sep 7, 2024
38bd693
not sure if we should validate from string construction
bdraco Sep 7, 2024
d57a8b0
urlsplit already does quite a bit of validation, add case for that, skip
bdraco Sep 7, 2024
63c03fa
make comment match test fix
bdraco Sep 7, 2024
aef0c14
make comment match test fix
bdraco Sep 7, 2024
fa31e95
Merge branch 'master' into validate_host
bdraco Sep 8, 2024
b8d12cf
Merge branch 'master' into validate_host
bdraco Sep 9, 2024
5b7391f
Merge branch 'master' into validate_host
bdraco Sep 22, 2024
ea3868f
Fix round-trip of IPv6 addresses
bdraco Sep 26, 2024
f3d6a1b
Update tests/test_url_build.py
bdraco Sep 26, 2024
1776185
Update yarl/_url.py
bdraco Sep 26, 2024
d432377
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 26, 2024
0566802
host_subcomponent
bdraco Sep 26, 2024
c4591f6
Merge remote-tracking branch 'origin/ipv6_fix' into ipv6_fix
bdraco Sep 26, 2024
4a85c17
tests
bdraco Sep 26, 2024
9c27548
Update yarl/_url.py
bdraco Sep 26, 2024
31da379
Apply suggestions from code review
bdraco Sep 26, 2024
86a0677
Update yarl/_url.py
bdraco Sep 26, 2024
6401a10
Update yarl/_url.py
bdraco Sep 26, 2024
59dbfe4
Update yarl/_url.py
bdraco Sep 26, 2024
e55083f
Update yarl/_url.py
bdraco Sep 26, 2024
f8093ea
tweaks
bdraco Sep 26, 2024
cd4505b
docs
bdraco Sep 26, 2024
7d40597
Update docs/api.rst
bdraco Sep 26, 2024
8cc832f
lint
bdraco Sep 26, 2024
946405e
lint
bdraco Sep 26, 2024
918da42
more coverage
bdraco Sep 26, 2024
fdd12b9
cleanup
bdraco Sep 26, 2024
d5923f0
cleanup
bdraco Sep 26, 2024
d9332cc
cleanup
bdraco Sep 26, 2024
2aae488
Merge branch 'host_subcomp' into ipv6_fix
bdraco Sep 26, 2024
ca5942b
more cover
bdraco Sep 26, 2024
2b2e668
Merge branch 'host_subcomp' into ipv6_fix
bdraco Sep 26, 2024
299d97e
cover
bdraco Sep 26, 2024
d518fd8
fixes
bdraco Sep 26, 2024
700df5d
Merge branch 'ipv6_fix' into validate_host
bdraco Sep 26, 2024
90f13f4
Merge branch 'master' into ipv6_fix
bdraco Sep 26, 2024
c464239
changelog
bdraco Sep 26, 2024
8ec8fef
Apply suggestions from code review
bdraco Sep 26, 2024
ed85d2b
Update CHANGES/1158.bugfix.rst
bdraco Sep 26, 2024
d4563d0
Merge branch 'ipv6_fix' into validate_host
bdraco Sep 26, 2024
a13eadd
Merge branch 'master' into validate_host
bdraco Sep 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES/880.bugfix.rst
mjpieters marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Started rejecting ASCII hostnames with invalid characters. For host strings that
look like authority strings, the exception message includes advice on what to do
instead -- by :user:`mjpieters`.
1 change: 1 addition & 0 deletions CHANGES/954.bugfix.rst
1 change: 1 addition & 0 deletions docs/spelling_wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ facto
glibc
google
hardcoded
hostnames
macOS
mailto
manylinux
Expand Down
19 changes: 19 additions & 0 deletions tests/test_url_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,25 @@ def test_build_with_authority_and_host():
URL.build(authority="host.com", host="example.com")


@pytest.mark.parametrize(
("host", "is_authority"),
[
("user:pass@host.com", True),
("user@host.com", True),
("host:com", False),
("not_percent_encoded%Zf", False),
("still_not_percent_encoded%fZ", False),
*(("other_gen_delim_" + c, False) for c in "/?#[]"),
],
)
def test_build_with_invalid_host(host: str, is_authority: bool):
match = r"Host '[^']+' cannot contain '[^']+' \(at position \d+\)"
if is_authority:
match += ", if .* use 'authority' instead of 'host'"
with pytest.raises(ValueError, match=f"{match}$"):
URL.build(host=host)


def test_build_with_authority():
url = URL.build(scheme="http", authority="степан:bar@host.com:8000", path="path")
assert (
Expand Down
20 changes: 20 additions & 0 deletions tests/test_url_update_netloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,26 @@ def test_with_host_non_ascii():
assert url2.authority == "оун-упа.укр:123"


@pytest.mark.parametrize(
("host", "is_authority"),
[
("user:pass@host.com", True),
("user@host.com", True),
("host:com", False),
("not_percent_encoded%Zf", False),
("still_not_percent_encoded%fZ", False),
*(("other_gen_delim_" + c, False) for c in "/?#[]"),
],
)
def test_with_invalid_host(host: str, is_authority: bool):
url = URL("http://example.com:123")
match = r"Host '[^']+' cannot contain '[^']+' \(at position \d+\)"
if is_authority:
match += ", if .* use 'authority' instead of 'host'"
with pytest.raises(ValueError, match=f"{match}$"):
url.with_host(host=host)


def test_with_host_percent_encoded():
url = URL("http://%25cf%2580%cf%80:%25cf%2580%cf%80@example.com:123")
url2 = url.with_host("%cf%80.org")
Expand Down
39 changes: 36 additions & 3 deletions yarl/_url.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import functools
import math
import re
import socket
import warnings
from collections.abc import Mapping, Sequence
Expand All @@ -18,6 +19,22 @@

sentinel = object()

# reg-name: unreserved / pct-encoded / sub-delims
# this pattern matches anything that is *not* in those classes. and is only used
# on lower-cased ASCII values.
_not_reg_name = re.compile(
r"""
# any character not in the unreserved or sub-delims sets, plus %
# (validated with the additional check for pct-encoded sequences below)
[^a-z0-9\-._~!$&'()*+,;=%]
|
# % only allowed if it is part of a pct-encoded
# sequence of 2 hex digits.
%(?![0-9a-f]{2})
""",
re.VERBOSE,
)


def rewrite_module(obj: object) -> object:
obj.__module__ = "yarl"
Expand Down Expand Up @@ -778,11 +795,27 @@
ip = ip_address(ip)
except ValueError:
host = host.lower()
# IDNA encoding is slow,
# skip it for ASCII-only strings
# skip it for humanized and ASCII-only strings
# Don't move the check into _idna_encode() helper
# to reduce the cache size
if human or host.isascii():
if human:
return host
if host.isascii():
# Check for invalid characters explicitly; _idna_encode() does this
bdraco marked this conversation as resolved.
Show resolved Hide resolved
# for non-ascii host names.
invalid = _not_reg_name.search(host)
if invalid is not None:
value, pos, extra = invalid.group(), invalid.start(), ""
if value == "@" or (value == ":" and "@" in host[pos:]):
# this looks like an authority string
extra = (
", if the value includes a username or password, "

Check warning on line 812 in yarl/_url.py

View check run for this annotation

Codecov / codecov/patch

yarl/_url.py#L812

Added line #L812 was not covered by tests
"use 'authority' instead of 'host'"
)
raise ValueError(
f"Host {host!r} cannot contain {value!r} (at position "
f"{pos}){extra}"
) from None

Check warning on line 818 in yarl/_url.py

View check run for this annotation

Codecov / codecov/patch

yarl/_url.py#L816-L818

Added lines #L816 - L818 were not covered by tests
return host
host = _idna_encode(host)
else:
Expand Down
Loading