From 5c977b52a33bf58f016e5968934c3fcb8b49b239 Mon Sep 17 00:00:00 2001 From: Martijn Pieters Date: Tue, 6 Jun 2023 17:38:47 +0100 Subject: [PATCH] Correct square bracket handling in URL netloc - The human representation of usernames and passwords should percent- encode square brackets. - Clean up the test suite to remove tests that use invalid hostnames (square brackets in a host name must only be used for IPv6 addresses). - Rename the remaining test using IPvFuture address syntax to make this explicit. - Drop a test for IPv6 addresses with a zone id; zone id support is controversial and expilictly excluded from the WHATWG URL standard. Zone ids *without percent characters in their name* continue to work as long as urllib.parse.urlsplit() accepts them but this is not something that yarl.URL() needs to support explicitly. --- CHANGES/876.bugfix.rst | 1 + tests/test_url.py | 10 ++-------- tests/test_url_parsing.py | 28 ++-------------------------- yarl/_url.py | 4 ++-- 4 files changed, 7 insertions(+), 36 deletions(-) create mode 100644 CHANGES/876.bugfix.rst --- /dev/null +++ b/CHANGES/876.bugfix.rst @@ -0,0 +1 @@ +Fixed the human representation of URLs with square brackets in usernames and passwords. --- a/tests/test_url.py +++ b/tests/test_url.py @@ -235,12 +235,6 @@ def test_compressed_ipv6(): assert url.host == url.raw_host -def test_ipv6_zone(): - url = URL("http://[fe80::822a:a8ff:fe49:470c%тест%42]:123") - assert url.raw_host == "fe80::822a:a8ff:fe49:470c%тест%42" - assert url.host == url.raw_host - - def test_ipv4_zone(): # I'm unsure if it is correct. url = URL("http://1.2.3.4%тест%42:123") @@ -1629,8 +1623,8 @@ def test_human_repr_delimiters(): s = url.human_repr() assert URL(s) == url assert ( - s == "http:// !\"%23$%25&'()*+,-.%2F%3A;<=>%3F%40[\\]^_`{|}~" - ": !\"%23$%25&'()*+,-.%2F%3A;<=>%3F%40[\\]^_`{|}~" + s == "http:// !\"%23$%25&'()*+,-.%2F%3A;<=>%3F%40%5B\\%5D^_`{|}~" + ": !\"%23$%25&'()*+,-.%2F%3A;<=>%3F%40%5B\\%5D^_`{|}~" "@хост.домен:8080" "/ !\"%23$%25&'()*+,-./:;<=>%3F@[\\]^_`{|}~" "? !\"%23$%25%26'()*%2B,-./:%3B<%3D>?@[\\]^_`{|}~" --- a/tests/test_url_parsing.py +++ b/tests/test_url_parsing.py @@ -178,14 +178,6 @@ class TestHost: assert u.query_string == "" assert u.fragment == "" - def test_masked_ipv4(self): - u = URL("//[127.0.0.1]/") - assert u.scheme == "" - assert u.host == "127.0.0.1" - assert u.path == "/" - assert u.query_string == "" - assert u.fragment == "" - def test_ipv6(self): u = URL("//[::1]/") assert u.scheme == "" @@ -194,15 +186,7 @@ class TestHost: assert u.query_string == "" assert u.fragment == "" - def test_strange_ip(self): - u = URL("//[-1]/") - assert u.scheme == "" - assert u.host == "-1" - assert u.path == "/" - assert u.query_string == "" - assert u.fragment == "" - - def test_strange_ip_2(self): + def test_ipvfuture_address(self): u = URL("//[v1.-1]/") assert u.scheme == "" assert u.host == "v1.-1" @@ -210,14 +194,6 @@ class TestHost: assert u.query_string == "" assert u.fragment == "" - def test_strange_ip_3(self): - u = URL("//v1.[::1]/") - assert u.scheme == "" - assert u.host == "::1" - assert u.path == "/" - assert u.query_string == "" - assert u.fragment == "" - class TestPort: def test_canonical(self): @@ -320,7 +296,7 @@ class TestUserInfo: assert u.fragment == "" def test_weird_user3(self): - u = URL("//[some]@host") + u = URL("//%5Bsome%5D@host") assert u.scheme == "" assert u.user == "[some]" assert u.password is None --- a/yarl/_url.py +++ b/yarl/_url.py @@ -1117,8 +1117,8 @@ class URL: def human_repr(self): """Return decoded human readable string for URL representation.""" - user = _human_quote(self.user, "#/:?@") - password = _human_quote(self.password, "#/:?@") + user = _human_quote(self.user, "#/:?@[]") + password = _human_quote(self.password, "#/:?@[]") host = self.host if host: host = self._encode_host(self.host, human=True)