Skip to content
10 changes: 10 additions & 0 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import unittest
import urllib.parse
from test import support
from string import ascii_letters, digits

RFC1808_BASE = "http://a/b/c/d;p?q#f"
RFC2396_BASE = "http://a/b/c/d;p?q"
Expand Down Expand Up @@ -1419,6 +1420,15 @@ def test_invalid_bracketed_hosts(self):
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip[suffix')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[suffix')
# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
unreserved = ascii_letters + digits + "-" + "." + "_" + "~"
zoneid_authorized_characters = unreserved
removed_characters = "\t\n\r"
for character in range(256):
character = chr(character)
if character in zoneid_authorized_characters or character in removed_characters:
continue
self.assertRaises(ValueError, urllib.parse.urlsplit, f'scheme://[::1%invalid{character}invalid]/')

def test_splitting_bracketed_hosts(self):
p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]:1234/path?query')
Expand Down
9 changes: 7 additions & 2 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@
# Unsafe bytes to be removed per WHATWG spec
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']

# Zone ID regex as defined in RFC 6874
zone_id_regex = re.compile(r"(%[a-fA-F0-9]{2}|[\w\.~-])+", flags=re.ASCII)

def clear_cache():
"""Clear internal performance caches. Undocumented; some tests want it."""
urlsplit.cache_clear()
Expand Down Expand Up @@ -461,11 +464,13 @@ def _check_bracketed_netloc(netloc):
def _check_bracketed_host(hostname):
if hostname.startswith('v'):
if not re.match(r"\Av[a-fA-F0-9]+\..+\z", hostname):
raise ValueError(f"IPvFuture address is invalid")
raise ValueError("IPvFuture address is invalid")
else:
ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4
if isinstance(ip, ipaddress.IPv4Address):
raise ValueError(f"An IPv4 address cannot be in brackets")
raise ValueError("An IPv4 address cannot be in brackets")
if "%" in hostname and not zone_id_regex.fullmatch(hostname.split("%", 1)[1]):
raise ValueError("IPv6 ZoneID is invalid")

# typed=True avoids BytesWarnings being emitted during cache key
# comparison since this API supports both bytes and str input.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:func:`urllib.parse.urlparse` now rejects IPv6 ZoneIDs containing
invalid or unsafe characters as per :rfc:`6874`.
Loading