Matej Cepl 2022-02-09 17:44:12 +00:00 committed by Git OBS Bridge
parent 510e372768
commit 5c19a933c4

View File

@ -59,9 +59,9 @@
+The presence of newline or tab characters in parts of a URL allows for some
+forms of attacks. Following the WHATWG specification that updates RFC 3986,
+ASCII newline ``\n``, ``\r`` and tab ``\t`` characters are stripped from the
+URL by the parser :func:`urllib.parse` preventing such attacks. The removal
+URL by the parser :func:`urlparse` preventing such attacks. The removal
+characters are controlled by a new module level variable
+``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :issue:`43882`)
+``urlparse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :issue:`43882`)
+
Python 3.1 Features
@ -75,7 +75,7 @@
+ def test_urlsplit_remove_unsafe_bytes(self):
+ # Remove ASCII tabs and newlines from input, for http common case scenario.
+ url = "h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ p = urlparse.urlsplit(url)
+ self.assertEqual(p.scheme, "http")
+ self.assertEqual(p.netloc, "www.python.org")
+ self.assertEqual(p.path, "/javascript:alert('msg')/")
@ -89,7 +89,7 @@
+
+ # Remove ASCII tabs and newlines from input as bytes, for http common case scenario.
+ url = b"h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ p = urlparse.urlsplit(url)
+ self.assertEqual(p.scheme, b"http")
+ self.assertEqual(p.netloc, b"www.python.org")
+ self.assertEqual(p.path, b"/javascript:alert('msg')/")
@ -103,12 +103,12 @@
+
+ # any scheme
+ url = "x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ p = urlparse.urlsplit(url)
+ self.assertEqual(p.geturl(), "x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+ # Remove ASCII tabs and newlines from input as bytes, any scheme.
+ url = b"x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ p = urlparse.urlsplit(url)
+ self.assertEqual(p.geturl(), b"x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+ # Unsafe bytes is not returned from urlparse cache.
@ -116,7 +116,7 @@
+ url = "https://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ scheme = "htt\nps"
+ for _ in range(2):
+ p = urllib.parse.urlsplit(url, scheme=scheme)
+ p = urlparse.urlsplit(url, scheme=scheme)
+ self.assertEqual(p.scheme, "https")
+ self.assertEqual(p.geturl(), "https://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
@ -165,5 +165,5 @@
+some forms of attacks.
+
+Following the controlling specification for URLs defined by WHATWG
+:func:`urllib.parse` now removes ASCII newlines and tabs from URLs,
+:func:`urlparse` now removes ASCII newlines and tabs from URLs,
+preventing such attacks.