Compare commits

..

No commits in common. "factory" and "devel" have entirely different histories.

12 changed files with 2047 additions and 0 deletions

View File

@ -0,0 +1,461 @@
---
Doc/library/email.utils.rst | 19 -
Lib/email/utils.py | 151 +++++++-
Lib/test/test_email/test_email.py | 187 +++++++++-
Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8
4 files changed, 344 insertions(+), 21 deletions(-)
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -60,13 +60,18 @@ of the new API.
begins with angle brackets, they are stripped off.
-.. function:: parseaddr(address)
+.. function:: parseaddr(address, *, strict=True)
Parse address -- which should be the value of some address-containing field such
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
+ If *strict* is true, use a strict parser which rejects malformed inputs.
+
+ .. versionchanged:: 3.13
+ Add *strict* optional parameter and reject malformed inputs by default.
+
.. function:: formataddr(pair, charset='utf-8')
@@ -84,12 +89,15 @@ of the new API.
Added the *charset* option.
-.. function:: getaddresses(fieldvalues)
+.. function:: getaddresses(fieldvalues, *, strict=True)
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
- :meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
- example that gets all the recipients of a message::
+ :meth:`Message.get_all <email.message.Message.get_all>`.
+
+ If *strict* is true, use a strict parser which rejects malformed inputs.
+
+ Here's a simple example that gets all the recipients of a message::
from email.utils import getaddresses
@@ -99,6 +107,9 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
+ .. versionchanged:: 3.13
+ Add *strict* optional parameter and reject malformed inputs by default.
+
.. function:: parsedate(date)
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -48,6 +48,7 @@ TICK = "'"
specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[\\"]')
+
def _has_surrogates(s):
"""Return True if s contains surrogate-escaped binary data."""
# This check is based on the fact that unless there are surrogates, utf8
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
return address
+def _iter_escaped_chars(addr):
+ pos = 0
+ escape = False
+ for pos, ch in enumerate(addr):
+ if escape:
+ yield (pos, '\\' + ch)
+ escape = False
+ elif ch == '\\':
+ escape = True
+ else:
+ yield (pos, ch)
+ if escape:
+ yield (pos, '\\')
+
+
+def _strip_quoted_realnames(addr):
+ """Strip real names between quotes."""
+ if '"' not in addr:
+ # Fast path
+ return addr
+
+ start = 0
+ open_pos = None
+ result = []
+ for pos, ch in _iter_escaped_chars(addr):
+ if ch == '"':
+ if open_pos is None:
+ open_pos = pos
+ else:
+ if start != open_pos:
+ result.append(addr[start:open_pos])
+ start = pos + 1
+ open_pos = None
-def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
- all = COMMASPACE.join(str(v) for v in fieldvalues)
- a = _AddressList(all)
- return a.addresslist
+ if start < len(addr):
+ result.append(addr[start:])
+
+ return ''.join(result)
+
+
+supports_strict_parsing = True
+
+def getaddresses(fieldvalues, *, strict=True):
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
+
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
+ its place.
+
+ If strict is true, use a strict parser which rejects malformed inputs.
+ """
+
+ # If strict is true, if the resulting list of parsed addresses is greater
+ # than the number of fieldvalues in the input list, a parsing error has
+ # occurred and consequently a list containing a single empty 2-tuple [('',
+ # '')] is returned in its place. This is done to avoid invalid output.
+ #
+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
+ # Safe output: [('', '')]
+
+ if not strict:
+ all = COMMASPACE.join(str(v) for v in fieldvalues)
+ a = _AddressList(all)
+ return a.addresslist
+
+ fieldvalues = [str(v) for v in fieldvalues]
+ fieldvalues = _pre_parse_validation(fieldvalues)
+ addr = COMMASPACE.join(fieldvalues)
+ a = _AddressList(addr)
+ result = _post_parse_validation(a.addresslist)
+
+ # Treat output as invalid if the number of addresses is not equal to the
+ # expected number of addresses.
+ n = 0
+ for v in fieldvalues:
+ # When a comma is used in the Real Name part it is not a deliminator.
+ # So strip those out before counting the commas.
+ v = _strip_quoted_realnames(v)
+ # Expected number of addresses: 1 + number of commas
+ n += 1 + v.count(',')
+ if len(result) != n:
+ return [('', '')]
+
+ return result
+
+
+def _check_parenthesis(addr):
+ # Ignore parenthesis in quoted real names.
+ addr = _strip_quoted_realnames(addr)
+
+ opens = 0
+ for pos, ch in _iter_escaped_chars(addr):
+ if ch == '(':
+ opens += 1
+ elif ch == ')':
+ opens -= 1
+ if opens < 0:
+ return False
+ return (opens == 0)
+
+
+def _pre_parse_validation(email_header_fields):
+ accepted_values = []
+ for v in email_header_fields:
+ if not _check_parenthesis(v):
+ v = "('', '')"
+ accepted_values.append(v)
+
+ return accepted_values
+
+
+def _post_parse_validation(parsed_email_header_tuples):
+ accepted_values = []
+ # The parser would have parsed a correctly formatted domain-literal
+ # The existence of an [ after parsing indicates a parsing failure
+ for v in parsed_email_header_tuples:
+ if '[' in v[1]:
+ v = ('', '')
+ accepted_values.append(v)
+
+ return accepted_values
def _format_timetuple_and_zone(timetuple, zone):
@@ -202,16 +318,33 @@ def parsedate_to_datetime(data):
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
-def parseaddr(addr):
+def parseaddr(addr, *, strict=True):
"""
Parse addr into its constituent realname and email address parts.
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
+
+ If strict is True, use a strict parser which rejects malformed inputs.
"""
- addrs = _AddressList(addr).addresslist
- if not addrs:
- return '', ''
+ if not strict:
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return ('', '')
+ return addrs[0]
+
+ if isinstance(addr, list):
+ addr = addr[0]
+
+ if not isinstance(addr, str):
+ return ('', '')
+
+ addr = _pre_parse_validation([addr])[0]
+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
+
+ if not addrs or len(addrs) > 1:
+ return ('', '')
+
return addrs[0]
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -16,6 +16,7 @@ from unittest.mock import patch
import email
import email.policy
+import email.utils
from email.charset import Charset
from email.header import Header, decode_header, make_header
@@ -3263,15 +3264,137 @@ Foo
[('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')])
+ def test_parsing_errors(self):
+ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
+ alice = 'alice@example.org'
+ bob = 'bob@example.com'
+ empty = ('', '')
+
+ # Test utils.getaddresses() and utils.parseaddr() on malformed email
+ # addresses: default behavior (strict=True) rejects malformed address,
+ # and strict=False which tolerates malformed address.
+ for invalid_separator, expected_non_strict in (
+ ('(', [(f'<{bob}>', alice)]),
+ (')', [('', alice), empty, ('', bob)]),
+ ('<', [('', alice), empty, ('', bob), empty]),
+ ('>', [('', alice), empty, ('', bob)]),
+ ('[', [('', f'{alice}[<{bob}>]')]),
+ (']', [('', alice), empty, ('', bob)]),
+ ('@', [empty, empty, ('', bob)]),
+ (';', [('', alice), empty, ('', bob)]),
+ (':', [('', alice), ('', bob)]),
+ ('.', [('', alice + '.'), ('', bob)]),
+ ('"', [('', alice), ('', f'<{bob}>')]),
+ ):
+ address = f'{alice}{invalid_separator}<{bob}>'
+ with self.subTest(address=address):
+ self.assertEqual(utils.getaddresses([address]),
+ [empty])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ expected_non_strict)
+
+ self.assertEqual(utils.parseaddr([address]),
+ empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Comma (',') is treated differently depending on strict parameter.
+ # Comma without quotes.
+ address = f'{alice},<{bob}>'
+ self.assertEqual(utils.getaddresses([address]),
+ [('', alice), ('', bob)])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('', alice), ('', bob)])
+ self.assertEqual(utils.parseaddr([address]),
+ empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Real name between quotes containing comma.
+ address = '"Alice, alice@example.org" <bob@example.com>'
+ expected_strict = ('Alice, alice@example.org', 'bob@example.com')
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Valid parenthesis in comments.
+ address = 'alice@example.org (Alice)'
+ expected_strict = ('Alice', 'alice@example.org')
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Invalid parenthesis in comments.
+ address = 'alice@example.org )Alice('
+ self.assertEqual(utils.getaddresses([address]), [empty])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
+ self.assertEqual(utils.parseaddr([address]), empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Two addresses with quotes separated by comma.
+ address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>'
+ self.assertEqual(utils.getaddresses([address]),
+ [('Jane Doe', 'jane@example.net'),
+ ('John Doe', 'john@example.net')])
+ self.assertEqual(utils.getaddresses([address], strict=False),
+ [('Jane Doe', 'jane@example.net'),
+ ('John Doe', 'john@example.net')])
+ self.assertEqual(utils.parseaddr([address]), empty)
+ self.assertEqual(utils.parseaddr([address], strict=False),
+ ('', address))
+
+ # Test email.utils.supports_strict_parsing attribute
+ self.assertEqual(email.utils.supports_strict_parsing, True)
+
def test_getaddresses_nasty(self):
- eq = self.assertEqual
- eq(utils.getaddresses(['foo: ;']), [('', '')])
- eq(utils.getaddresses(
- ['[]*-- =~$']),
- [('', ''), ('', ''), ('', '*--')])
- eq(utils.getaddresses(
- ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
+ for addresses, expected in (
+ (['"Sürname, Firstname" <to@example.com>'],
+ [('Sürname, Firstname', 'to@example.com')]),
+
+ (['foo: ;'],
+ [('', '')]),
+
+ (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
+
+ ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
+
+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
+ [('', '')]),
+
+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'],
+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
+
+ (['John Doe <jdoe@machine(comment). example>'],
+ [('John Doe (comment)', 'jdoe@machine.example')]),
+
+ (['"Mary Smith: Personal Account" <smith@home.example>'],
+ [('Mary Smith: Personal Account', 'smith@home.example')]),
+
+ (['Undisclosed recipients:;'],
+ [('', '')]),
+
+ ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
+ ):
+ with self.subTest(addresses=addresses):
+ self.assertEqual(utils.getaddresses(addresses),
+ expected)
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
+ expected)
+
+ addresses = ['[]*-- =~$']
+ self.assertEqual(utils.getaddresses(addresses),
+ [('', '')])
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
+ [('', ''), ('', ''), ('', '*--')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
@@ -3460,6 +3583,54 @@ multipart/report
m = cls(*constructor, policy=email.policy.default)
self.assertIs(m.policy, email.policy.default)
+ def test_iter_escaped_chars(self):
+ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')),
+ [(0, 'a'),
+ (2, '\\\\'),
+ (3, 'b'),
+ (5, '\\"'),
+ (6, 'c'),
+ (8, '\\\\'),
+ (9, '"'),
+ (10, 'd')])
+ self.assertEqual(list(utils._iter_escaped_chars('a\\')),
+ [(0, 'a'), (1, '\\')])
+
+ def test_strip_quoted_realnames(self):
+ def check(addr, expected):
+ self.assertEqual(utils._strip_quoted_realnames(addr), expected)
+
+ check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>',
+ ' <jane@example.net>, <john@example.net>')
+ check(r'"Jane \"Doe\"." <jane@example.net>',
+ ' <jane@example.net>')
+
+ # special cases
+ check(r'before"name"after', 'beforeafter')
+ check(r'before"name"', 'before')
+ check(r'b"name"', 'b') # single char
+ check(r'"name"after', 'after')
+ check(r'"name"a', 'a') # single char
+ check(r'"name"', '')
+
+ # no change
+ for addr in (
+ 'Jane Doe <jane@example.net>, John Doe <john@example.net>',
+ 'lone " quote',
+ ):
+ self.assertEqual(utils._strip_quoted_realnames(addr), addr)
+
+
+ def test_check_parenthesis(self):
+ addr = 'alice@example.net'
+ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)'))
+ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice('))
+ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))'))
+ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)'))
+
+ # Ignore real name between quotes
+ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}'))
+
# Test the iterator/generators
class TestIterators(TestEmailBase):
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
@@ -0,0 +1,8 @@
+:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now
+return ``('', '')`` 2-tuples in more situations where invalid email
+addresses are encountered instead of potentially inaccurate values. Add
+optional *strict* parameter to these two functions: use ``strict=False`` to
+get the old behavior, accept malformed inputs.
+``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check
+if the *strict* paramater is available. Patch by Thomas Dwyer and Victor
+Stinner to improve the CVE-2023-27043 fix.

View File

@ -0,0 +1,173 @@
From 732c7d512e7cdf656a3f02a38c329b14a14a8573 Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth@python.org>
Date: Fri, 19 Apr 2024 11:21:40 -0700
Subject: [PATCH] [3.9] gh-114572: Fix locking in cert_store_stats and
get_ca_certs
---
Misc/NEWS.d/next/Security/2024-04-19-11-21-13.gh-issue-114572.t1QMQD.rst | 4
Modules/_ssl.c | 91 +++++++++-
2 files changed, 92 insertions(+), 3 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2024-04-19-11-21-13.gh-issue-114572.t1QMQD.rst
Index: Python-3.9.19/Misc/NEWS.d/next/Security/2024-04-19-11-21-13.gh-issue-114572.t1QMQD.rst
===================================================================
--- /dev/null
+++ Python-3.9.19/Misc/NEWS.d/next/Security/2024-04-19-11-21-13.gh-issue-114572.t1QMQD.rst
@@ -0,0 +1,4 @@
+:meth:`ssl.SSLContext.cert_store_stats` and
+:meth:`ssl.SSLContext.get_ca_certs` now correctly lock access to the
+certificate store, when the :class:`ssl.SSLContext` is shared across
+multiple threads.
Index: Python-3.9.19/Modules/_ssl.c
===================================================================
--- Python-3.9.19.orig/Modules/_ssl.c
+++ Python-3.9.19/Modules/_ssl.c
@@ -166,6 +166,10 @@ extern const SSL_METHOD *TLSv1_2_method(
# define PY_OPENSSL_1_1_API 1
#endif
+#if (OPENSSL_VERSION_NUMBER >= 0x30300000L) && !defined(LIBRESSL_VERSION_NUMBER)
+# define OPENSSL_VERSION_3_3 1
+#endif
+
/* SNI support (client- and server-side) appeared in OpenSSL 1.0.0 and 0.9.8f
* This includes the SSL_set_SSL_CTX() function.
*/
@@ -210,6 +214,16 @@ extern const SSL_METHOD *TLSv1_2_method(
#define HAVE_OPENSSL_CRYPTO_LOCK
#endif
+/* OpenSSL 1.1+ allows locking X509_STORE, 1.0.2 doesn't. */
+#ifdef OPENSSL_VERSION_1_1
+#define HAVE_OPENSSL_X509_STORE_LOCK
+#endif
+
+/* OpenSSL 3.3 added the X509_STORE_get1_objects API */
+#ifdef OPENSSL_VERSION_3_3
+#define HAVE_OPENSSL_X509_STORE_GET1_OBJECTS 1
+#endif
+
#if defined(OPENSSL_VERSION_1_1) && !defined(OPENSSL_NO_SSL2)
#define OPENSSL_NO_SSL2
#endif
@@ -4675,6 +4689,54 @@ set_sni_callback(PySSLContext *self, PyO
#endif
}
+/* Shim of X509_STORE_get1_objects API from OpenSSL 3.3
+ * Only available with the X509_STORE_lock() API */
+#if defined(HAVE_OPENSSL_X509_STORE_LOCK) && !defined(OPENSSL_VERSION_3_3)
+#define HAVE_OPENSSL_X509_STORE_GET1_OBJECTS 1
+
+static X509_OBJECT *x509_object_dup(const X509_OBJECT *obj)
+{
+ int ok;
+ X509_OBJECT *ret = X509_OBJECT_new();
+ if (ret == NULL) {
+ return NULL;
+ }
+ switch (X509_OBJECT_get_type(obj)) {
+ case X509_LU_X509:
+ ok = X509_OBJECT_set1_X509(ret, X509_OBJECT_get0_X509(obj));
+ break;
+ case X509_LU_CRL:
+ /* X509_OBJECT_get0_X509_CRL was not const-correct prior to 3.0.*/
+ ok = X509_OBJECT_set1_X509_CRL(
+ ret, X509_OBJECT_get0_X509_CRL((X509_OBJECT *)obj));
+ break;
+ default:
+ /* We cannot duplicate unrecognized types in a polyfill, but it is
+ * safe to leave an empty object. The caller will ignore it. */
+ ok = 1;
+ break;
+ }
+ if (!ok) {
+ X509_OBJECT_free(ret);
+ return NULL;
+ }
+ return ret;
+}
+
+static STACK_OF(X509_OBJECT) *
+X509_STORE_get1_objects(X509_STORE *store)
+{
+ STACK_OF(X509_OBJECT) *ret;
+ if (!X509_STORE_lock(store)) {
+ return NULL;
+ }
+ ret = sk_X509_OBJECT_deep_copy(X509_STORE_get0_objects(store),
+ x509_object_dup, X509_OBJECT_free);
+ X509_STORE_unlock(store);
+ return ret;
+}
+#endif
+
PyDoc_STRVAR(PySSLContext_sni_callback_doc,
"Set a callback that will be called when a server name is provided by the SSL/TLS client in the SNI extension.\n\
\n\
@@ -4704,7 +4766,15 @@ _ssl__SSLContext_cert_store_stats_impl(P
int x509 = 0, crl = 0, ca = 0, i;
store = SSL_CTX_get_cert_store(self->ctx);
+#if HAVE_OPENSSL_X509_STORE_GET1_OBJECTS
+ objs = X509_STORE_get1_objects(store);
+ if (objs == NULL) {
+ PyErr_SetString(PyExc_MemoryError, "failed to query cert store");
+ return NULL;
+ }
+#else
objs = X509_STORE_get0_objects(store);
+#endif
for (i = 0; i < sk_X509_OBJECT_num(objs); i++) {
obj = sk_X509_OBJECT_value(objs, i);
switch (X509_OBJECT_get_type(obj)) {
@@ -4718,12 +4788,13 @@ _ssl__SSLContext_cert_store_stats_impl(P
crl++;
break;
default:
- /* Ignore X509_LU_FAIL, X509_LU_RETRY, X509_LU_PKEY.
- * As far as I can tell they are internal states and never
- * stored in a cert store */
+ /* Ignore unrecognized types. */
break;
}
}
+#if HAVE_OPENSSL_X509_STORE_GET1_OBJECTS
+ sk_X509_OBJECT_pop_free(objs, X509_OBJECT_free);
+#endif
return Py_BuildValue("{sisisi}", "x509", x509, "crl", crl,
"x509_ca", ca);
}
@@ -4755,7 +4826,15 @@ _ssl__SSLContext_get_ca_certs_impl(PySSL
}
store = SSL_CTX_get_cert_store(self->ctx);
+#if HAVE_OPENSSL_X509_STORE_GET1_OBJECTS
+ objs = X509_STORE_get1_objects(store);
+ if (objs == NULL) {
+ PyErr_SetString(PyExc_MemoryError, "failed to query cert store");
+ return NULL;
+ }
+#else
objs = X509_STORE_get0_objects(store);
+#endif
for (i = 0; i < sk_X509_OBJECT_num(objs); i++) {
X509_OBJECT *obj;
X509 *cert;
@@ -4783,9 +4862,15 @@ _ssl__SSLContext_get_ca_certs_impl(PySSL
}
Py_CLEAR(ci);
}
+#if HAVE_OPENSSL_X509_STORE_GET1_OBJECTS
+ sk_X509_OBJECT_pop_free(objs, X509_OBJECT_free);
+#endif
return rlist;
error:
+#if HAVE_OPENSSL_X509_STORE_GET1_OBJECTS
+ sk_X509_OBJECT_pop_free(objs, X509_OBJECT_free);
+#endif
Py_XDECREF(ci);
Py_XDECREF(rlist);
return NULL;

View File

@ -0,0 +1,97 @@
From 37bc08c699f48461be5e000b2da9212237a1ca0f Mon Sep 17 00:00:00 2001
From: JohnJamesUtley <jjutley231@gmail.com>
Date: Tue, 25 Apr 2023 16:01:03 -0400
Subject: [PATCH 1/4] Adds checks to ensure that bracketed hosts found by
urlsplit are of IPv6 or IPvFuture format
---
Lib/test/test_urlparse.py | 26 ++++++++++
Lib/urllib/parse.py | 16 +++++-
Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst | 2
3 files changed, 43 insertions(+), 1 deletion(-)
create mode 100644 Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -1135,6 +1135,32 @@ class UrlParseTestCase(unittest.TestCase
self.assertEqual(p2.scheme, 'tel')
self.assertEqual(p2.path, '+31641044153')
+ def test_invalid_bracketed_hosts(self):
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path')
+
+ def test_splitting_bracketed_hosts(self):
+ p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query')
+ self.assertEqual(p1.hostname, 'v6a.ip')
+ self.assertEqual(p1.username, 'user')
+ self.assertEqual(p1.path, '/path')
+ p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query')
+ self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test')
+ self.assertEqual(p2.username, 'user')
+ self.assertEqual(p2.path, '/path')
+ p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query')
+ self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test')
+ self.assertEqual(p3.username, 'user')
+ self.assertEqual(p3.path, '/path')
+
def test_port_casting_failure_message(self):
message = "Port could not be cast to integer value as 'oracle'"
p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -36,6 +36,7 @@ import sys
import types
import collections
import warnings
+import ipaddress
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit", "urlencode", "parse_qs",
@@ -442,6 +443,17 @@ def _checknetloc(netloc):
raise ValueError("netloc '" + netloc + "' contains invalid " +
"characters under NFKC normalization")
+# Valid bracketed hosts are defined in
+# https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/
+def _check_bracketed_host(hostname):
+ if hostname.startswith('v'):
+ if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", hostname):
+ raise ValueError(f"IPvFuture address is invalid")
+ else:
+ ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4
+ if isinstance(ip, ipaddress.IPv4Address):
+ raise ValueError(f"An IPv4 address cannot be in brackets")
+
def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
@@ -488,12 +500,14 @@ def urlsplit(url, scheme='', allow_fragm
break
else:
scheme, url = url[:i].lower(), url[i+1:]
-
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
+ if '[' in netloc and ']' in netloc:
+ bracketed_host = netloc.partition('[')[2].partition(']')[0]
+ _check_bracketed_host(bracketed_host)
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-04-26-09-54-25.gh-issue-103848.aDSnpR.rst
@@ -0,0 +1,2 @@
+Add checks to ensure that ``[`` bracketed ``]`` hosts found by
+:func:`urllib.parse.urlsplit` are of IPv6 or IPvFuture format.

View File

@ -0,0 +1,396 @@
From 5e58376d424fb951966277e5d46cf0b11d860ef3 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 24 Apr 2024 14:29:30 +0200
Subject: [PATCH 1/3] gh-113171: gh-65056: Fix "private" (non-global) IP
address ranges (GH-113179) (GH-113186) (GH-118177)
* GH-113171: Fix "private" (non-global) IP address ranges (GH-113179)
The _private_networks variables, used by various is_private
implementations, were missing some ranges and at the same time had
overly strict ranges (where there are more specific ranges considered
globally reachable by the IANA registries).
This patch updates the ranges with what was missing or otherwise
incorrect.
100.64.0.0/10 is left alone, for now, as it's been made special in [1].
The _address_exclude_many() call returns 8 networks for IPv4, 121
networks for IPv6.
[1] https://github.com/python/cpython/issues/61602
* GH-65056: Improve the IP address' is_global/is_private documentation (GH-113186)
It wasn't clear what the semantics of is_global/is_private are and, when
one gets to the bottom of it, it's not quite so simple (hence the
exceptions listed).
(cherry picked from commit 2a4cbf17af19a01d942f9579342f77c39fbd23c4)
(cherry picked from commit 40d75c2b7f5c67e254d0a025e0f2e2c7ada7f69f)
---------
(cherry picked from commit f86b17ac511e68192ba71f27e752321a3252cee3)
Co-authored-by: Jakub Stasiak <jakub@stasiak.at>
---
Doc/library/ipaddress.rst | 43 +++-
Doc/tools/susp-ignored.csv | 8
Doc/whatsnew/3.9.rst | 9
Lib/ipaddress.py | 107 +++++++---
Lib/test/test_ipaddress.py | 52 ++++
Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst | 9
6 files changed, 201 insertions(+), 27 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst
--- a/Doc/library/ipaddress.rst
+++ b/Doc/library/ipaddress.rst
@@ -188,18 +188,53 @@ write code that handles both IP versions
.. attribute:: is_private
- ``True`` if the address is allocated for private networks. See
+ ``True`` if the address is defined as not globally reachable by
iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_
- (for IPv6).
+ (for IPv6) with the following exceptions:
+
+ * ``is_private`` is ``False`` for the shared address space (``100.64.0.0/10``)
+ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the
+ semantics of the underlying IPv4 addresses and the following condition holds
+ (see :attr:`IPv6Address.ipv4_mapped`)::
+
+ address.is_private == address.ipv4_mapped.is_private
+
+ ``is_private`` has value opposite to :attr:`is_global`, except for the shared address space
+ (``100.64.0.0/10`` range) where they are both ``False``.
+
+ .. versionchanged:: 3.9.20
+
+ Fixed some false positives and false negatives.
+
+ * ``192.0.0.0/24`` is considered private with the exception of ``192.0.0.9/32`` and
+ ``192.0.0.10/32`` (previously: only the ``192.0.0.0/29`` sub-range was considered private).
+ * ``64:ff9b:1::/48`` is considered private.
+ * ``2002::/16`` is considered private.
+ * There are exceptions within ``2001::/23`` (otherwise considered private): ``2001:1::1/128``,
+ ``2001:1::2/128``, ``2001:3::/32``, ``2001:4:112::/48``, ``2001:20::/28``, ``2001:30::/28``.
+ The exceptions are not considered private.
.. attribute:: is_global
- ``True`` if the address is allocated for public networks. See
+ ``True`` if the address is defined as globally reachable by
iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_
- (for IPv6).
+ (for IPv6) with the following exception:
+
+ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the
+ semantics of the underlying IPv4 addresses and the following condition holds
+ (see :attr:`IPv6Address.ipv4_mapped`)::
+
+ address.is_global == address.ipv4_mapped.is_global
+
+ ``is_global`` has value opposite to :attr:`is_private`, except for the shared address space
+ (``100.64.0.0/10`` range) where they are both ``False``.
.. versionadded:: 3.4
+ .. versionchanged:: 3.9.20
+
+ Fixed some false positives and false negatives, see :attr:`is_private` for details.
+
.. attribute:: is_unspecified
``True`` if the address is unspecified. See :RFC:`5735` (for IPv4)
--- a/Doc/tools/susp-ignored.csv
+++ b/Doc/tools/susp-ignored.csv
@@ -169,6 +169,14 @@ library/ipaddress,,:db00,2001:db00::0/24
library/ipaddress,,::,2001:db00::0/24
library/ipaddress,,:db00,2001:db00::0/ffff:ff00::
library/ipaddress,,::,2001:db00::0/ffff:ff00::
+library/ipaddress,,:ff9b,64:ff9b:1::/48
+library/ipaddress,,::,64:ff9b:1::/48
+library/ipaddress,,::,2001::
+library/ipaddress,,::,2001:1::
+library/ipaddress,,::,2001:3::
+library/ipaddress,,::,2001:4:112::
+library/ipaddress,,::,2001:20::
+library/ipaddress,,::,2001:30::
library/itertools,,:step,elements from seq[start:stop:step]
library/itertools,,:stop,elements from seq[start:stop:step]
library/itertools,,::,kernel = tuple(kernel)[::-1]
--- a/Doc/whatsnew/3.9.rst
+++ b/Doc/whatsnew/3.9.rst
@@ -1616,3 +1616,12 @@ tarfile
:exc:`DeprecationWarning`.
In Python 3.14, the default will switch to ``'data'``.
(Contributed by Petr Viktorin in :pep:`706`.)
+
+Notable changes in 3.9.20
+=========================
+
+ipaddress
+---------
+
+* Fixed ``is_global`` and ``is_private`` behavior in ``IPv4Address``,
+ ``IPv6Address``, ``IPv4Network`` and ``IPv6Network``.
--- a/Lib/ipaddress.py
+++ b/Lib/ipaddress.py
@@ -1322,18 +1322,41 @@ class IPv4Address(_BaseV4, _BaseAddress)
@property
@functools.lru_cache()
def is_private(self):
- """Test if this address is allocated for private networks.
-
- Returns:
- A boolean, True if the address is reserved per
- iana-ipv4-special-registry.
-
- """
- return any(self in net for net in self._constants._private_networks)
+ """``True`` if the address is defined as not globally reachable by
+ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_
+ (for IPv6) with the following exceptions:
+
+ * ``is_private`` is ``False`` for ``100.64.0.0/10``
+ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the
+ semantics of the underlying IPv4 addresses and the following condition holds
+ (see :attr:`IPv6Address.ipv4_mapped`)::
+
+ address.is_private == address.ipv4_mapped.is_private
+
+ ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10``
+ IPv4 range where they are both ``False``.
+ """
+ return (
+ any(self in net for net in self._constants._private_networks)
+ and all(self not in net for net in self._constants._private_networks_exceptions)
+ )
@property
@functools.lru_cache()
def is_global(self):
+ """``True`` if the address is defined as globally reachable by
+ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_
+ (for IPv6) with the following exception:
+
+ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the
+ semantics of the underlying IPv4 addresses and the following condition holds
+ (see :attr:`IPv6Address.ipv4_mapped`)::
+
+ address.is_global == address.ipv4_mapped.is_global
+
+ ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10``
+ IPv4 range where they are both ``False``.
+ """
return self not in self._constants._public_network and not self.is_private
@property
@@ -1537,13 +1560,15 @@ class _IPv4Constants:
_public_network = IPv4Network('100.64.0.0/10')
+ # Not globally reachable address blocks listed on
+ # https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
_private_networks = [
IPv4Network('0.0.0.0/8'),
IPv4Network('10.0.0.0/8'),
IPv4Network('127.0.0.0/8'),
IPv4Network('169.254.0.0/16'),
IPv4Network('172.16.0.0/12'),
- IPv4Network('192.0.0.0/29'),
+ IPv4Network('192.0.0.0/24'),
IPv4Network('192.0.0.170/31'),
IPv4Network('192.0.2.0/24'),
IPv4Network('192.168.0.0/16'),
@@ -1554,6 +1579,11 @@ class _IPv4Constants:
IPv4Network('255.255.255.255/32'),
]
+ _private_networks_exceptions = [
+ IPv4Network('192.0.0.9/32'),
+ IPv4Network('192.0.0.10/32'),
+ ]
+
_reserved_network = IPv4Network('240.0.0.0/4')
_unspecified_address = IPv4Address('0.0.0.0')
@@ -1995,23 +2025,42 @@ class IPv6Address(_BaseV6, _BaseAddress)
@property
@functools.lru_cache()
def is_private(self):
- """Test if this address is allocated for private networks.
-
- Returns:
- A boolean, True if the address is reserved per
- iana-ipv6-special-registry.
-
- """
- return any(self in net for net in self._constants._private_networks)
+ """``True`` if the address is defined as not globally reachable by
+ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_
+ (for IPv6) with the following exceptions:
+
+ * ``is_private`` is ``False`` for ``100.64.0.0/10``
+ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the
+ semantics of the underlying IPv4 addresses and the following condition holds
+ (see :attr:`IPv6Address.ipv4_mapped`)::
+
+ address.is_private == address.ipv4_mapped.is_private
+
+ ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10``
+ IPv4 range where they are both ``False``.
+ """
+ ipv4_mapped = self.ipv4_mapped
+ if ipv4_mapped is not None:
+ return ipv4_mapped.is_private
+ return (
+ any(self in net for net in self._constants._private_networks)
+ and all(self not in net for net in self._constants._private_networks_exceptions)
+ )
@property
def is_global(self):
- """Test if this address is allocated for public networks.
+ """``True`` if the address is defined as globally reachable by
+ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_
+ (for IPv6) with the following exception:
+
+ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the
+ semantics of the underlying IPv4 addresses and the following condition holds
+ (see :attr:`IPv6Address.ipv4_mapped`)::
- Returns:
- A boolean, true if the address is not reserved per
- iana-ipv6-special-registry.
+ address.is_global == address.ipv4_mapped.is_global
+ ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10``
+ IPv4 range where they are both ``False``.
"""
return not self.is_private
@@ -2252,19 +2301,31 @@ class _IPv6Constants:
_multicast_network = IPv6Network('ff00::/8')
+ # Not globally reachable address blocks listed on
+ # https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
_private_networks = [
IPv6Network('::1/128'),
IPv6Network('::/128'),
IPv6Network('::ffff:0:0/96'),
+ IPv6Network('64:ff9b:1::/48'),
IPv6Network('100::/64'),
IPv6Network('2001::/23'),
- IPv6Network('2001:2::/48'),
IPv6Network('2001:db8::/32'),
- IPv6Network('2001:10::/28'),
+ # IANA says N/A, let's consider it not globally reachable to be safe
+ IPv6Network('2002::/16'),
IPv6Network('fc00::/7'),
IPv6Network('fe80::/10'),
]
+ _private_networks_exceptions = [
+ IPv6Network('2001:1::1/128'),
+ IPv6Network('2001:1::2/128'),
+ IPv6Network('2001:3::/32'),
+ IPv6Network('2001:4:112::/48'),
+ IPv6Network('2001:20::/28'),
+ IPv6Network('2001:30::/28'),
+ ]
+
_reserved_networks = [
IPv6Network('::/8'), IPv6Network('100::/8'),
IPv6Network('200::/7'), IPv6Network('400::/6'),
--- a/Lib/test/test_ipaddress.py
+++ b/Lib/test/test_ipaddress.py
@@ -2263,6 +2263,10 @@ class IpaddrUnitTest(unittest.TestCase):
self.assertEqual(True, ipaddress.ip_address(
'172.31.255.255').is_private)
self.assertEqual(False, ipaddress.ip_address('172.32.0.0').is_private)
+ self.assertFalse(ipaddress.ip_address('192.0.0.0').is_global)
+ self.assertTrue(ipaddress.ip_address('192.0.0.9').is_global)
+ self.assertTrue(ipaddress.ip_address('192.0.0.10').is_global)
+ self.assertFalse(ipaddress.ip_address('192.0.0.255').is_global)
self.assertEqual(True,
ipaddress.ip_address('169.254.100.200').is_link_local)
@@ -2278,6 +2282,40 @@ class IpaddrUnitTest(unittest.TestCase):
self.assertEqual(False, ipaddress.ip_address('128.0.0.0').is_loopback)
self.assertEqual(True, ipaddress.ip_network('0.0.0.0').is_unspecified)
+ def testPrivateNetworks(self):
+ self.assertEqual(True, ipaddress.ip_network("0.0.0.0/0").is_private)
+ self.assertEqual(False, ipaddress.ip_network("1.0.0.0/8").is_private)
+
+ self.assertEqual(True, ipaddress.ip_network("0.0.0.0/8").is_private)
+ self.assertEqual(True, ipaddress.ip_network("10.0.0.0/8").is_private)
+ self.assertEqual(True, ipaddress.ip_network("127.0.0.0/8").is_private)
+ self.assertEqual(True, ipaddress.ip_network("169.254.0.0/16").is_private)
+ self.assertEqual(True, ipaddress.ip_network("172.16.0.0/12").is_private)
+ self.assertEqual(True, ipaddress.ip_network("192.0.0.0/29").is_private)
+ self.assertEqual(False, ipaddress.ip_network("192.0.0.9/32").is_private)
+ self.assertEqual(True, ipaddress.ip_network("192.0.0.170/31").is_private)
+ self.assertEqual(True, ipaddress.ip_network("192.0.2.0/24").is_private)
+ self.assertEqual(True, ipaddress.ip_network("192.168.0.0/16").is_private)
+ self.assertEqual(True, ipaddress.ip_network("198.18.0.0/15").is_private)
+ self.assertEqual(True, ipaddress.ip_network("198.51.100.0/24").is_private)
+ self.assertEqual(True, ipaddress.ip_network("203.0.113.0/24").is_private)
+ self.assertEqual(True, ipaddress.ip_network("240.0.0.0/4").is_private)
+ self.assertEqual(True, ipaddress.ip_network("255.255.255.255/32").is_private)
+
+ self.assertEqual(False, ipaddress.ip_network("::/0").is_private)
+ self.assertEqual(False, ipaddress.ip_network("::ff/128").is_private)
+
+ self.assertEqual(True, ipaddress.ip_network("::1/128").is_private)
+ self.assertEqual(True, ipaddress.ip_network("::/128").is_private)
+ self.assertEqual(True, ipaddress.ip_network("::ffff:0:0/96").is_private)
+ self.assertEqual(True, ipaddress.ip_network("100::/64").is_private)
+ self.assertEqual(True, ipaddress.ip_network("2001:2::/48").is_private)
+ self.assertEqual(False, ipaddress.ip_network("2001:3::/48").is_private)
+ self.assertEqual(True, ipaddress.ip_network("2001:db8::/32").is_private)
+ self.assertEqual(True, ipaddress.ip_network("2001:10::/28").is_private)
+ self.assertEqual(True, ipaddress.ip_network("fc00::/7").is_private)
+ self.assertEqual(True, ipaddress.ip_network("fe80::/10").is_private)
+
def testReservedIpv6(self):
self.assertEqual(True, ipaddress.ip_network('ffff::').is_multicast)
@@ -2351,6 +2389,20 @@ class IpaddrUnitTest(unittest.TestCase):
self.assertEqual(True, ipaddress.ip_address('0::0').is_unspecified)
self.assertEqual(False, ipaddress.ip_address('::1').is_unspecified)
+ self.assertFalse(ipaddress.ip_address('64:ff9b:1::').is_global)
+ self.assertFalse(ipaddress.ip_address('2001::').is_global)
+ self.assertTrue(ipaddress.ip_address('2001:1::1').is_global)
+ self.assertTrue(ipaddress.ip_address('2001:1::2').is_global)
+ self.assertFalse(ipaddress.ip_address('2001:2::').is_global)
+ self.assertTrue(ipaddress.ip_address('2001:3::').is_global)
+ self.assertFalse(ipaddress.ip_address('2001:4::').is_global)
+ self.assertTrue(ipaddress.ip_address('2001:4:112::').is_global)
+ self.assertFalse(ipaddress.ip_address('2001:10::').is_global)
+ self.assertTrue(ipaddress.ip_address('2001:20::').is_global)
+ self.assertTrue(ipaddress.ip_address('2001:30::').is_global)
+ self.assertFalse(ipaddress.ip_address('2001:40::').is_global)
+ self.assertFalse(ipaddress.ip_address('2002::').is_global)
+
# some generic IETF reserved addresses
self.assertEqual(True, ipaddress.ip_address('100::').is_reserved)
self.assertEqual(True, ipaddress.ip_network('4000::1/128').is_reserved)
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst
@@ -0,0 +1,9 @@
+Fixed various false positives and false negatives in
+
+* :attr:`ipaddress.IPv4Address.is_private` (see these docs for details)
+* :attr:`ipaddress.IPv4Address.is_global`
+* :attr:`ipaddress.IPv6Address.is_private`
+* :attr:`ipaddress.IPv6Address.is_global`
+
+Also in the corresponding :class:`ipaddress.IPv4Network` and :class:`ipaddress.IPv6Network`
+attributes.

View File

@ -0,0 +1,125 @@
From 15eec9d5076b780463c3dc73afcef688651c5295 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 17 Aug 2024 16:30:52 +0300
Subject: [PATCH] gh-123067: Fix quadratic complexity in parsing "-quoted
cookie values with backslashes (GH-123075)
This fixes CVE-2024-7592.
(cherry picked from commit 44e458357fca05ca0ae2658d62c8c595b048b5ef)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Lib/http/cookies.py | 34 ++------
Lib/test/test_http_cookies.py | 38 ++++++++++
Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst | 1
3 files changed, 47 insertions(+), 26 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst
--- a/Lib/http/cookies.py
+++ b/Lib/http/cookies.py
@@ -184,8 +184,13 @@ def _quote(str):
return '"' + str.translate(_Translator) + '"'
-_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
-_QuotePatt = re.compile(r"[\\].")
+_unquote_sub = re.compile(r'\\(?:([0-3][0-7][0-7])|(.))').sub
+
+def _unquote_replace(m):
+ if m[1]:
+ return chr(int(m[1], 8))
+ else:
+ return m[2]
def _unquote(str):
# If there aren't any doublequotes,
@@ -205,30 +210,7 @@ def _unquote(str):
# \012 --> \n
# \" --> "
#
- i = 0
- n = len(str)
- res = []
- while 0 <= i < n:
- o_match = _OctalPatt.search(str, i)
- q_match = _QuotePatt.search(str, i)
- if not o_match and not q_match: # Neither matched
- res.append(str[i:])
- break
- # else:
- j = k = -1
- if o_match:
- j = o_match.start(0)
- if q_match:
- k = q_match.start(0)
- if q_match and (not o_match or k < j): # QuotePatt matched
- res.append(str[i:k])
- res.append(str[k+1])
- i = k + 2
- else: # OctalPatt matched
- res.append(str[i:j])
- res.append(chr(int(str[j+1:j+4], 8)))
- i = j + 4
- return _nulljoin(res)
+ return _unquote_sub(_unquote_replace, str)
# The _getdate() routine is used to set the expiration time in the cookie's HTTP
# header. By default, _getdate() returns the current time in the appropriate
--- a/Lib/test/test_http_cookies.py
+++ b/Lib/test/test_http_cookies.py
@@ -5,6 +5,7 @@ from test.support import run_unittest, r
import unittest
from http import cookies
import pickle
+from test import support
class CookieTests(unittest.TestCase):
@@ -58,6 +59,43 @@ class CookieTests(unittest.TestCase):
for k, v in sorted(case['dict'].items()):
self.assertEqual(C[k].value, v)
+ def test_unquote(self):
+ cases = [
+ (r'a="b=\""', 'b="'),
+ (r'a="b=\\"', 'b=\\'),
+ (r'a="b=\="', 'b=='),
+ (r'a="b=\n"', 'b=n'),
+ (r'a="b=\042"', 'b="'),
+ (r'a="b=\134"', 'b=\\'),
+ (r'a="b=\377"', 'b=\xff'),
+ (r'a="b=\400"', 'b=400'),
+ (r'a="b=\42"', 'b=42'),
+ (r'a="b=\\042"', 'b=\\042'),
+ (r'a="b=\\134"', 'b=\\134'),
+ (r'a="b=\\\""', 'b=\\"'),
+ (r'a="b=\\\042"', 'b=\\"'),
+ (r'a="b=\134\""', 'b=\\"'),
+ (r'a="b=\134\042"', 'b=\\"'),
+ ]
+ for encoded, decoded in cases:
+ with self.subTest(encoded):
+ C = cookies.SimpleCookie()
+ C.load(encoded)
+ self.assertEqual(C['a'].value, decoded)
+
+ @support.requires_resource('cpu')
+ def test_unquote_large(self):
+ n = 10**6
+ for encoded in r'\\', r'\134':
+ with self.subTest(encoded):
+ data = 'a="b=' + encoded*n + ';"'
+ C = cookies.SimpleCookie()
+ C.load(data)
+ value = C['a'].value
+ self.assertEqual(value[:3], 'b=\\')
+ self.assertEqual(value[-2:], '\\;')
+ self.assertEqual(len(value), n + 3)
+
def test_load(self):
C = cookies.SimpleCookie()
C.load('Customer="WILE_E_COYOTE"; Version=1; Path=/acme')
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst
@@ -0,0 +1 @@
+Fix quadratic complexity in parsing ``"``-quoted cookie values with backslashes by :mod:`http.cookies`.

View File

@ -0,0 +1,339 @@
From f9ddc53ea850fb02d640a9b3263756d43fb6d868 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 31 Jul 2024 00:19:48 +0200
Subject: [PATCH] [3.9] gh-121650: Encode newlines in headers, and verify
headers are sound (GH-122233)
GH-GH- Encode header parts that contain newlines
Per RFC 2047:
> [...] these encoding schemes allow the
> encoding of arbitrary octet values, mail readers that implement this
> decoding should also ensure that display of the decoded data on the
> recipient's terminal will not cause unwanted side-effects
It seems that the "quoted-word" scheme is a valid way to include
a newline character in a header value, just like we already allow
undecodable bytes or control characters.
They do need to be properly quoted when serialized to text, though.
GH-GH- Verify that email headers are well-formed
This should fail for custom fold() implementations that aren't careful
about newlines.
(cherry picked from commit 097633981879b3c9de9a1dd120d3aa585ecc2384)
Co-authored-by: Petr Viktorin <encukou@gmail.com>
Co-authored-by: Bas Bloemsaat <bas@bloemsaat.org>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Doc/library/email.errors.rst | 6
Doc/library/email.policy.rst | 18 ++
Doc/whatsnew/3.9.rst | 12 +
Lib/email/_header_value_parser.py | 12 +
Lib/email/_policybase.py | 8 +
Lib/email/errors.py | 4
Lib/email/generator.py | 13 +-
Lib/test/test_email/test_generator.py | 62 ++++++++++
Lib/test/test_email/test_policy.py | 26 ++++
Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst | 5
10 files changed, 162 insertions(+), 4 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst
--- a/Doc/library/email.errors.rst
+++ b/Doc/library/email.errors.rst
@@ -59,6 +59,12 @@ The following exception classes are defi
:class:`~email.mime.image.MIMEImage`).
+.. exception:: HeaderWriteError()
+
+ Raised when an error occurs when the :mod:`~email.generator` outputs
+ headers.
+
+
Here is the list of the defects that the :class:`~email.parser.FeedParser`
can find while parsing messages. Note that the defects are added to the message
where the problem was found, so for example, if a message nested inside a
--- a/Doc/library/email.policy.rst
+++ b/Doc/library/email.policy.rst
@@ -229,6 +229,24 @@ added matters. To illustrate::
.. versionadded:: 3.6
+
+ .. attribute:: verify_generated_headers
+
+ If ``True`` (the default), the generator will raise
+ :exc:`~email.errors.HeaderWriteError` instead of writing a header
+ that is improperly folded or delimited, such that it would
+ be parsed as multiple headers or joined with adjacent data.
+ Such headers can be generated by custom header classes or bugs
+ in the ``email`` module.
+
+ As it's a security feature, this defaults to ``True`` even in the
+ :class:`~email.policy.Compat32` policy.
+ For backwards compatible, but unsafe, behavior, it must be set to
+ ``False`` explicitly.
+
+ .. versionadded:: 3.11.10
+
+
The following :class:`Policy` method is intended to be called by code using
the email library to create policy instances with custom settings:
--- a/Doc/whatsnew/3.9.rst
+++ b/Doc/whatsnew/3.9.rst
@@ -1625,3 +1625,15 @@ ipaddress
* Fixed ``is_global`` and ``is_private`` behavior in ``IPv4Address``,
``IPv6Address``, ``IPv4Network`` and ``IPv6Network``.
+
+email
+-----
+
+* Headers with embedded newlines are now quoted on output.
+
+ The :mod:`~email.generator` will now refuse to serialize (write) headers
+ that are improperly folded or delimited, such that they would be parsed as
+ multiple headers or joined with adjacent data.
+ If you need to turn this safety feature off,
+ set :attr:`~email.policy.Policy.verify_generated_headers`.
+ (Contributed by Bas Bloemsaat and Petr Viktorin in :gh:`121650`.)
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -92,6 +92,8 @@ TOKEN_ENDS = TSPECIALS | WSP
ASPECIALS = TSPECIALS | set("*'%")
ATTRIBUTE_ENDS = ASPECIALS | WSP
EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
+NLSET = {'\n', '\r'}
+SPECIALSNL = SPECIALS | NLSET
def quote_string(value):
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
@@ -2778,9 +2780,13 @@ def _refold_parse_tree(parse_tree, *, po
wrap_as_ew_blocked -= 1
continue
tstr = str(part)
- if part.token_type == 'ptext' and set(tstr) & SPECIALS:
- # Encode if tstr contains special characters.
- want_encoding = True
+ if not want_encoding:
+ if part.token_type == 'ptext':
+ # Encode if tstr contains special characters.
+ want_encoding = not SPECIALSNL.isdisjoint(tstr)
+ else:
+ # Encode if tstr contains newlines.
+ want_encoding = not NLSET.isdisjoint(tstr)
try:
tstr.encode(encoding)
charset = encoding
--- a/Lib/email/_policybase.py
+++ b/Lib/email/_policybase.py
@@ -157,6 +157,13 @@ class Policy(_PolicyBase, metaclass=abc.
message_factory -- the class to use to create new message objects.
If the value is None, the default is Message.
+ verify_generated_headers
+ -- if true, the generator verifies that each header
+ they are properly folded, so that a parser won't
+ treat it as multiple headers, start-of-body, or
+ part of another header.
+ This is a check against custom Header & fold()
+ implementations.
"""
raise_on_defect = False
@@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.
max_line_length = 78
mangle_from_ = False
message_factory = None
+ verify_generated_headers = True
def handle_defect(self, obj, defect):
"""Based on policy, either raise defect or call register_defect.
--- a/Lib/email/errors.py
+++ b/Lib/email/errors.py
@@ -29,6 +29,10 @@ class CharsetError(MessageError):
"""An illegal charset was given."""
+class HeaderWriteError(MessageError):
+ """Error while writing headers."""
+
+
# These are parsing defects which the parser was able to work around.
class MessageDefect(ValueError):
"""Base class for a message defect."""
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -14,12 +14,14 @@ import random
from copy import deepcopy
from io import StringIO, BytesIO
from email.utils import _has_surrogates
+from email.errors import HeaderWriteError
UNDERSCORE = '_'
NL = '\n' # XXX: no longer used by the code below.
NLCRE = re.compile(r'\r\n|\r|\n')
fcre = re.compile(r'^From ', re.MULTILINE)
+NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]')
@@ -223,7 +225,16 @@ class Generator:
def _write_headers(self, msg):
for h, v in msg.raw_items():
- self.write(self.policy.fold(h, v))
+ folded = self.policy.fold(h, v)
+ if self.policy.verify_generated_headers:
+ linesep = self.policy.linesep
+ if not folded.endswith(self.policy.linesep):
+ raise HeaderWriteError(
+ f'folded header does not end with {linesep!r}: {folded!r}')
+ if NEWLINE_WITHOUT_FWSP.search(folded.removesuffix(linesep)):
+ raise HeaderWriteError(
+ f'folded header contains newline: {folded!r}')
+ self.write(folded)
# A blank line always separates headers from body
self.write(self._NL)
--- a/Lib/test/test_email/test_generator.py
+++ b/Lib/test/test_email/test_generator.py
@@ -6,6 +6,7 @@ from email.message import EmailMessage
from email.generator import Generator, BytesGenerator
from email.headerregistry import Address
from email import policy
+import email.errors
from test.test_email import TestEmailBase, parameterize
@@ -216,6 +217,44 @@ class TestGeneratorBase:
g.flatten(msg)
self.assertEqual(s.getvalue(), self.typ(expected))
+ def test_keep_encoded_newlines(self):
+ msg = self.msgmaker(self.typ(textwrap.dedent("""\
+ To: nobody
+ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com
+
+ None
+ """)))
+ expected = textwrap.dedent("""\
+ To: nobody
+ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com
+
+ None
+ """)
+ s = self.ioclass()
+ g = self.genclass(s, policy=self.policy.clone(max_line_length=80))
+ g.flatten(msg)
+ self.assertEqual(s.getvalue(), self.typ(expected))
+
+ def test_keep_long_encoded_newlines(self):
+ msg = self.msgmaker(self.typ(textwrap.dedent("""\
+ To: nobody
+ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com
+
+ None
+ """)))
+ expected = textwrap.dedent("""\
+ To: nobody
+ Subject: Bad subject
+ =?utf-8?q?=0A?=Bcc:
+ injection@example.com
+
+ None
+ """)
+ s = self.ioclass()
+ g = self.genclass(s, policy=self.policy.clone(max_line_length=30))
+ g.flatten(msg)
+ self.assertEqual(s.getvalue(), self.typ(expected))
+
class TestGenerator(TestGeneratorBase, TestEmailBase):
@@ -224,6 +263,29 @@ class TestGenerator(TestGeneratorBase, T
ioclass = io.StringIO
typ = str
+ def test_verify_generated_headers(self):
+ """gh-121650: by default the generator prevents header injection"""
+ class LiteralHeader(str):
+ name = 'Header'
+ def fold(self, **kwargs):
+ return self
+
+ for text in (
+ 'Value\r\nBad Injection\r\n',
+ 'NoNewLine'
+ ):
+ with self.subTest(text=text):
+ message = message_from_string(
+ "Header: Value\r\n\r\nBody",
+ policy=self.policy,
+ )
+
+ del message['Header']
+ message['Header'] = LiteralHeader(text)
+
+ with self.assertRaises(email.errors.HeaderWriteError):
+ message.as_string()
+
class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
--- a/Lib/test/test_email/test_policy.py
+++ b/Lib/test/test_email/test_policy.py
@@ -26,6 +26,7 @@ class PolicyAPITests(unittest.TestCase):
'raise_on_defect': False,
'mangle_from_': True,
'message_factory': None,
+ 'verify_generated_headers': True,
}
# These default values are the ones set on email.policy.default.
# If any of these defaults change, the docs must be updated.
@@ -277,6 +278,31 @@ class PolicyAPITests(unittest.TestCase):
with self.assertRaises(email.errors.HeaderParseError):
policy.fold("Subject", subject)
+ def test_verify_generated_headers(self):
+ """Turning protection off allows header injection"""
+ policy = email.policy.default.clone(verify_generated_headers=False)
+ for text in (
+ 'Header: Value\r\nBad: Injection\r\n',
+ 'Header: NoNewLine'
+ ):
+ with self.subTest(text=text):
+ message = email.message_from_string(
+ "Header: Value\r\n\r\nBody",
+ policy=policy,
+ )
+ class LiteralHeader(str):
+ name = 'Header'
+ def fold(self, **kwargs):
+ return self
+
+ del message['Header']
+ message['Header'] = LiteralHeader(text)
+
+ self.assertEqual(
+ message.as_string(),
+ f"{text}\nBody",
+ )
+
# XXX: Need subclassing tests.
# For adding subclassed objects, make sure the usual rules apply (subclass
# wins), but that the order still works (right overrides left).
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst
@@ -0,0 +1,5 @@
+:mod:`email` headers with embedded newlines are now quoted on output. The
+:mod:`~email.generator` will now refuse to serialize (write) headers that
+are unsafely folded or delimited; see
+:attr:`~email.policy.Policy.verify_generated_headers`. (Contributed by Bas
+Bloemsaat and Petr Viktorin in :gh:`121650`.)

View File

@ -0,0 +1,136 @@
From dcb320a0c85713c5dfe89a83d6eb295ad1511be8 Mon Sep 17 00:00:00 2001
From: "Jason R. Coombs" <jaraco@jaraco.com>
Date: Tue, 27 Aug 2024 17:10:30 -0400
Subject: [PATCH] [3.8] [3.9] [3.11] gh-123270: Replaced SanitizedNames with a
more surgical fix. (GH-123354)
Applies changes from zipp 3.20.1 and jaraco/zippGH-124
(cherry picked from commit 2231286d78d328c2f575e0b05b16fe447d1656d6)
(cherry picked from commit 17b77bb41409259bad1cd6c74761c18b6ab1e860)
(cherry picked from commit 66d3383)
Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
---
Lib/test/test_zipfile.py | 75 ++++++++++
Lib/zipfile.py | 9 -
Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst | 3
3 files changed, 85 insertions(+), 2 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -3054,6 +3054,81 @@ class TestPath(unittest.TestCase):
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
zipfile.CompleteDirs._implied_dirs(data)
+ def test_malformed_paths(self):
+ """
+ Path should handle malformed paths gracefully.
+
+ Paths with leading slashes are not visible.
+
+ Paths with dots are treated like regular files.
+ """
+ data = io.BytesIO()
+ zf = zipfile.ZipFile(data, "w")
+ zf.writestr("../parent.txt", b"content")
+ zf.filename = ''
+ root = zipfile.Path(zf)
+ assert list(map(str, root.iterdir())) == ['../']
+ assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
+
+ def test_unsupported_names(self):
+ """
+ Path segments with special characters are readable.
+
+ On some platforms or file systems, characters like
+ ``:`` and ``?`` are not allowed, but they are valid
+ in the zip file.
+ """
+ data = io.BytesIO()
+ zf = zipfile.ZipFile(data, "w")
+ zf.writestr("path?", b"content")
+ zf.writestr("V: NMS.flac", b"fLaC...")
+ zf.filename = ''
+ root = zipfile.Path(zf)
+ contents = root.iterdir()
+ assert next(contents).name == 'path?'
+ assert next(contents).name == 'V: NMS.flac'
+ assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
+
+ def test_backslash_not_separator(self):
+ """
+ In a zip file, backslashes are not separators.
+ """
+ data = io.BytesIO()
+ zf = zipfile.ZipFile(data, "w")
+ zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
+ zf.filename = ''
+ root = zipfile.Path(zf)
+ (first,) = root.iterdir()
+ assert not first.is_dir()
+ assert first.name == 'foo\\bar'
+
+
+class DirtyZipInfo(zipfile.ZipInfo):
+ """
+ Bypass name sanitization.
+ """
+
+ def __init__(self, filename, *args, **kwargs):
+ super().__init__(filename, *args, **kwargs)
+ self.filename = filename
+
+ @classmethod
+ def for_name(cls, name, archive):
+ """
+ Construct the same way that ZipFile.writestr does.
+
+ TODO: extract this functionality and re-use
+ """
+ self = cls(filename=name, date_time=time.localtime(time.time())[:6])
+ self.compress_type = archive.compression
+ self.compress_level = archive.compresslevel
+ if self.filename.endswith('/'): # pragma: no cover
+ self.external_attr = 0o40775 << 16 # drwxrwxr-x
+ self.external_attr |= 0x10 # MS-DOS directory flag
+ else:
+ self.external_attr = 0o600 << 16 # ?rw-------
+ return self
+
if __name__ == "__main__":
unittest.main()
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -2146,7 +2146,7 @@ def _parents(path):
def _ancestry(path):
"""
Given a path with elements separated by
- posixpath.sep, generate all elements of that path
+ posixpath.sep, generate all elements of that path.
>>> list(_ancestry('b/d'))
['b/d', 'b']
@@ -2158,9 +2158,14 @@ def _ancestry(path):
['b']
>>> list(_ancestry(''))
[]
+
+ Multiple separators are treated like a single.
+
+ >>> list(_ancestry('//b//d///f//'))
+ ['//b//d///f', '//b//d', '//b']
"""
path = path.rstrip(posixpath.sep)
- while path and path != posixpath.sep:
+ while path.rstrip(posixpath.sep):
yield path
path, tail = posixpath.split(path)
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst
@@ -0,0 +1,3 @@
+Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
+causing infinite loops (gh-122905) without breaking contents using
+legitimate characters.

View File

@ -0,0 +1,282 @@
From ae0d64cb185900712c40a65d7d8aa118f9903d57 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 1 Nov 2024 14:11:47 +0100
Subject: [PATCH] [3.11] gh-124651: Quote template strings in `venv` activation
scripts (GH-124712) (GH-126185) (#126269)
(cherry picked from commit ae961ae94bf19c8f8c7fbea3d1c25cc55ce8ae97)
---
Lib/test/test_venv.py | 81 ++++++++++
Lib/venv/__init__.py | 42 ++++-
Lib/venv/scripts/common/activate | 6
Lib/venv/scripts/nt/activate.bat | 4
Lib/venv/scripts/posix/activate.csh | 6
Lib/venv/scripts/posix/activate.fish | 6
Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst | 1
7 files changed, 130 insertions(+), 16 deletions(-)
create mode 100644 Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst
--- a/Lib/test/test_venv.py
+++ b/Lib/test/test_venv.py
@@ -14,6 +14,7 @@ import struct
import subprocess
import sys
import tempfile
+import shlex
from test.support import (captured_stdout, captured_stderr, requires_zlib,
can_symlink, EnvironmentVarGuard, rmtree,
import_module,
@@ -85,6 +86,10 @@ class BaseTest(unittest.TestCase):
result = f.read()
return result
+ def assertEndsWith(self, string, tail):
+ if not string.endswith(tail):
+ self.fail(f"String {string!r} does not end with {tail!r}")
+
class BasicTest(BaseTest):
"""Test venv module functionality."""
@@ -342,6 +347,82 @@ class BasicTest(BaseTest):
'import sys; print(sys.executable)'])
self.assertEqual(out.strip(), envpy.encode())
+ # gh-124651: test quoted strings
+ @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows')
+ def test_special_chars_bash(self):
+ """
+ Test that the template strings are quoted properly (bash)
+ """
+ rmtree(self.env_dir)
+ bash = shutil.which('bash')
+ if bash is None:
+ self.skipTest('bash required for this test')
+ env_name = '"\';&&$e|\'"'
+ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name)
+ builder = venv.EnvBuilder(clear=True)
+ builder.create(env_dir)
+ activate = os.path.join(env_dir, self.bindir, 'activate')
+ test_script = os.path.join(self.env_dir, 'test_special_chars.sh')
+ with open(test_script, "w") as f:
+ f.write(f'source {shlex.quote(activate)}\n'
+ 'python -c \'import sys; print(sys.executable)\'\n'
+ 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n'
+ 'deactivate\n')
+ out, err = check_output([bash, test_script])
+ lines = out.splitlines()
+ self.assertTrue(env_name.encode() in lines[0])
+ self.assertEndsWith(lines[1], env_name.encode())
+
+ # gh-124651: test quoted strings
+ @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows')
+ def test_special_chars_csh(self):
+ """
+ Test that the template strings are quoted properly (csh)
+ """
+ rmtree(self.env_dir)
+ csh = shutil.which('tcsh') or shutil.which('csh')
+ if csh is None:
+ self.skipTest('csh required for this test')
+ env_name = '"\';&&$e|\'"'
+ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name)
+ builder = venv.EnvBuilder(clear=True)
+ builder.create(env_dir)
+ activate = os.path.join(env_dir, self.bindir, 'activate.csh')
+ test_script = os.path.join(self.env_dir, 'test_special_chars.csh')
+ with open(test_script, "w") as f:
+ f.write(f'source {shlex.quote(activate)}\n'
+ 'python -c \'import sys; print(sys.executable)\'\n'
+ 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n'
+ 'deactivate\n')
+ out, err = check_output([csh, test_script])
+ lines = out.splitlines()
+ self.assertTrue(env_name.encode() in lines[0])
+ self.assertEndsWith(lines[1], env_name.encode())
+
+ # gh-124651: test quoted strings on Windows
+ @unittest.skipUnless(os.name == 'nt', 'only relevant on Windows')
+ def test_special_chars_windows(self):
+ """
+ Test that the template strings are quoted properly on Windows
+ """
+ rmtree(self.env_dir)
+ env_name = "'&&^$e"
+ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name)
+ builder = venv.EnvBuilder(clear=True)
+ builder.create(env_dir)
+ activate = os.path.join(env_dir, self.bindir, 'activate.bat')
+ test_batch = os.path.join(self.env_dir, 'test_special_chars.bat')
+ with open(test_batch, "w") as f:
+ f.write('@echo off\n'
+ f'"{activate}" & '
+ f'{self.exe} -c "import sys; print(sys.executable)" & '
+ f'{self.exe} -c "import os; print(os.environ[\'VIRTUAL_ENV\'])" & '
+ 'deactivate')
+ out, err = check_output([test_batch])
+ lines = out.splitlines()
+ self.assertTrue(env_name.encode() in lines[0])
+ self.assertEndsWith(lines[1], env_name.encode())
+
@unittest.skipUnless(os.name == 'nt', 'only relevant on Windows')
def test_unicode_in_batch_file(self):
"""
--- a/Lib/venv/__init__.py
+++ b/Lib/venv/__init__.py
@@ -11,6 +11,7 @@ import subprocess
import sys
import sysconfig
import types
+import shlex
CORE_VENV_DEPS = ('pip', 'setuptools')
@@ -348,11 +349,41 @@ class EnvBuilder:
:param context: The information for the environment creation request
being processed.
"""
- text = text.replace('__VENV_DIR__', context.env_dir)
- text = text.replace('__VENV_NAME__', context.env_name)
- text = text.replace('__VENV_PROMPT__', context.prompt)
- text = text.replace('__VENV_BIN_NAME__', context.bin_name)
- text = text.replace('__VENV_PYTHON__', context.env_exe)
+ replacements = {
+ '__VENV_DIR__': context.env_dir,
+ '__VENV_NAME__': context.env_name,
+ '__VENV_PROMPT__': context.prompt,
+ '__VENV_BIN_NAME__': context.bin_name,
+ '__VENV_PYTHON__': context.env_exe,
+ }
+
+ def quote_ps1(s):
+ """
+ This should satisfy PowerShell quoting rules [1], unless the quoted
+ string is passed directly to Windows native commands [2].
+ [1]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_quoting_rules
+ [2]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_parsing#passing-arguments-that-contain-quote-characters
+ """
+ s = s.replace("'", "''")
+ return f"'{s}'"
+
+ def quote_bat(s):
+ return s
+
+ # gh-124651: need to quote the template strings properly
+ quote = shlex.quote
+ script_path = context.script_path
+ if script_path.endswith('.ps1'):
+ quote = quote_ps1
+ elif script_path.endswith('.bat'):
+ quote = quote_bat
+ else:
+ # fallbacks to POSIX shell compliant quote
+ quote = shlex.quote
+
+ replacements = {key: quote(s) for key, s in replacements.items()}
+ for key, quoted in replacements.items():
+ text = text.replace(key, quoted)
return text
def install_scripts(self, context, path):
@@ -392,6 +423,7 @@ class EnvBuilder:
with open(srcfile, 'rb') as f:
data = f.read()
if not srcfile.endswith(('.exe', '.pdb')):
+ context.script_path = srcfile
try:
data = data.decode('utf-8')
data = self.replace_variables(data, context)
--- a/Lib/venv/scripts/common/activate
+++ b/Lib/venv/scripts/common/activate
@@ -37,11 +37,11 @@ deactivate () {
# unset irrelevant variables
deactivate nondestructive
-VIRTUAL_ENV="__VENV_DIR__"
+VIRTUAL_ENV=__VENV_DIR__
export VIRTUAL_ENV
_OLD_VIRTUAL_PATH="$PATH"
-PATH="$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH"
+PATH="$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH"
export PATH
# unset PYTHONHOME if set
@@ -54,7 +54,7 @@ fi
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
_OLD_VIRTUAL_PS1="${PS1:-}"
- PS1="__VENV_PROMPT__${PS1:-}"
+ PS1=__VENV_PROMPT__"${PS1:-}"
export PS1
fi
--- a/Lib/venv/scripts/nt/activate.bat
+++ b/Lib/venv/scripts/nt/activate.bat
@@ -8,7 +8,7 @@ if defined _OLD_CODEPAGE (
"%SystemRoot%\System32\chcp.com" 65001 > nul
)
-set VIRTUAL_ENV=__VENV_DIR__
+set "VIRTUAL_ENV=__VENV_DIR__"
if not defined PROMPT set PROMPT=$P$G
@@ -24,7 +24,7 @@ set PYTHONHOME=
if defined _OLD_VIRTUAL_PATH set PATH=%_OLD_VIRTUAL_PATH%
if not defined _OLD_VIRTUAL_PATH set _OLD_VIRTUAL_PATH=%PATH%
-set PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH%
+set "PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH%"
:END
if defined _OLD_CODEPAGE (
--- a/Lib/venv/scripts/posix/activate.csh
+++ b/Lib/venv/scripts/posix/activate.csh
@@ -8,16 +8,16 @@ alias deactivate 'test $?_OLD_VIRTUAL_PA
# Unset irrelevant variables.
deactivate nondestructive
-setenv VIRTUAL_ENV "__VENV_DIR__"
+setenv VIRTUAL_ENV __VENV_DIR__
set _OLD_VIRTUAL_PATH="$PATH"
-setenv PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH"
+setenv PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH"
set _OLD_VIRTUAL_PROMPT="$prompt"
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
- set prompt = "__VENV_PROMPT__$prompt"
+ set prompt = __VENV_PROMPT__"$prompt"
endif
alias pydoc python -m pydoc
--- a/Lib/venv/scripts/posix/activate.fish
+++ b/Lib/venv/scripts/posix/activate.fish
@@ -29,10 +29,10 @@ end
# Unset irrelevant variables.
deactivate nondestructive
-set -gx VIRTUAL_ENV "__VENV_DIR__"
+set -gx VIRTUAL_ENV __VENV_DIR__
set -gx _OLD_VIRTUAL_PATH $PATH
-set -gx PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__" $PATH
+set -gx PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__ $PATH
# Unset PYTHONHOME if set.
if set -q PYTHONHOME
@@ -52,7 +52,7 @@ if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
set -l old_status $status
# Output the venv prompt; color taken from the blue of the Python logo.
- printf "%s%s%s" (set_color 4B8BBE) "__VENV_PROMPT__" (set_color normal)
+ printf "%s%s%s" (set_color 4B8BBE) __VENV_PROMPT__ (set_color normal)
# Restore the return status of the previous command.
echo "exit $old_status" | .
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-09-28-02-03-04.gh-issue-124651.bLBGtH.rst
@@ -0,0 +1 @@
+Properly quote template strings in :mod:`venv` activation scripts.

3
Python-3.9.19.tar.xz Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d4892cd1618f6458cb851208c030df1482779609d0f3939991bd38184f8c679e
size 19682840

16
Python-3.9.19.tar.xz.asc Normal file
View File

@ -0,0 +1,16 @@
-----BEGIN PGP SIGNATURE-----
iQIzBAABCgAdFiEE4/8oOcBIslwITevpsmmV4xAlBWgFAmX5uMIACgkQsmmV4xAl
BWj1tQ//T2qX0m08xWGV7az0D1sH3qjoY+4fEYrknw5uAHqZFiQecRsF27jxv6iH
gP/6GAUw+lbH+9UofhCc0NbPOklliS7gFLNqJdKYFB6JXRNxiRYKh3uVx5o2n0ES
kR3kRl77S47rtCbSMrKTh6ZoWowyIUZGFsIonk5KsLv+oELXY1AK/Im9i3/iTJ1Z
jd/e2oHWuseIxbGZAO8AEP8zOsMMIHfsL3ry8H9xhhPyQM6t5DldqLH3UVE6kq95
fs+olGO4FEKif3VDuLaHVlgtGZOUr6aDIYUmWxctPicboSb6RJAq37CCYgWykOyB
WQec0ONbU7lxt5jhemLSDRy0mEio7+nXIKsO9rDN0Wk1QMpHUl77/C5qVlzfHal7
NhPt8Yl0hBnOjzTq+di+xhAKJcdKp+zZH7/ugAbthuqhNfnkqiF68PANHrCm3gbY
myN0eSaQ9yIa/MbHW8Am9NL/nuFbxdJUL/OIKQ9kFHgD7Qid86TZF0G2vbiBH/eF
IVYoMxRZLd7eu5dIcwXSef+Ai97pODbx9y7bOCFyBO9FuFrlhPObgc7KXCeAzP+y
k5eWvZtWTvvQ+2si2iT22EPBO0D0pnhYWZKpGK5EuKuw8nasNS1yLbhDTVpARynd
8buQh3t2wPfILlQr0+JzDY8GSdQ/nIHGgx2IERdSX/v+9Yo2AvU=
=gYAl
-----END PGP SIGNATURE-----

3
Python-3.9.20.tar.xz Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6b281279efd85294d2d6993e173983a57464c0133956fbbb5536ec9646beaf0c
size 19648968

16
Python-3.9.20.tar.xz.asc Normal file
View File

@ -0,0 +1,16 @@
-----BEGIN PGP SIGNATURE-----
iQIzBAABCAAdFiEE4/8oOcBIslwITevpsmmV4xAlBWgFAmbcKf0ACgkQsmmV4xAl
BWh4rg//R5E1EjsifYqhLeIyT+JnrBvbTZeEcdxPXevsgilojYmrxBUKuXXViul0
YZFaoDf6wjbHh6NMNgUpqcOH/5S/LsFZvuEcrw0jyGlMr0AMA4KLmNvQ9Wxf+wp4
mUmhymQx555nVivsdPiziNnDwubZeA870ZllYEMWP5vXw7p2LbnlZvn7A+LSKjqM
S/6xbiKYVexK3vHY/uG0xo4z24FySfvs0/PF11JfRJCxm9+bli7FmHOoFMwpOO6S
caZLok4987YWOcPIPY6h+o2sFhDqHs8POGKd8k+0KQNQs5UbEQ4t/eKgnaoATkGn
nfcAGXSjX5RSv5uXPzBUc0PulYo6EalIn1b5fu96La/FEg9GLMR/n9g75Fgm/j9L
QGYu/DSaastY/c7Ot4QVyB6pxbQKjM438yneQrjhKBILGla4Crh1k6yRCx93j/TH
hF9kiuRf7jtLIGTp0cnquELGnatmL1RhOySn/1Y+asMR+oK8d+XQab//w4VsAt7C
SIfVXg25PUgZoaiYj/qIjLK9vkcj/EZ1IacivP5qBWb3O1E8gzSV8Z9duGT8Ef3P
ch4M/pd6hefVVVfyCoazB3gwDs68O6U2BIRdYLRlet8AuKTBysQKFwOo3EcCMmJV
W20KutPnERCzt8jeJdzFd0z3po9mvxNTKDLYaABtNI6NN00LcsM=
=svjf
-----END PGP SIGNATURE-----