diff --git a/CVE-2023-27043-email-parsing-errors.patch b/CVE-2023-27043-email-parsing-errors.patch index 24e6079..b22b81b 100644 --- a/CVE-2023-27043-email-parsing-errors.patch +++ b/CVE-2023-27043-email-parsing-errors.patch @@ -1,70 +1,185 @@ --- - Doc/library/email.utils.rst | 26 +++ - Lib/email/utils.py | 63 +++++++ - Lib/test/test_email/test_email.py | 81 +++++++++- - Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 4 - 4 files changed, 164 insertions(+), 10 deletions(-) + Doc/library/email.utils.rst | 19 - + Lib/email/utils.py | 151 +++++++- + Lib/test/test_email/test_email.py | 187 +++++++++- + Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 + 4 files changed, 344 insertions(+), 21 deletions(-) --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst -@@ -67,6 +67,11 @@ of the new API. +@@ -60,13 +60,18 @@ of the new API. + begins with angle brackets, they are stripped off. + + +-.. function:: parseaddr(address) ++.. function:: parseaddr(address, *, strict=True) + + Parse address -- which should be the value of some address-containing field such + as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. -+ .. versionchanged:: 3.12 -+ For security reasons, addresses that were ambiguous and could parse into -+ multiple different addresses now cause ``('', '')`` to be returned -+ instead of only one of the *potential* addresses. ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. + .. function:: formataddr(pair, charset='utf-8') -@@ -89,7 +94,7 @@ of the new API. +@@ -84,12 +89,15 @@ of the new API. + Added the *charset* option. + + +-.. function:: getaddresses(fieldvalues) ++.. function:: getaddresses(fieldvalues, *, strict=True) + This method returns a list of 2-tuples of the form returned by ``parseaddr()``. *fieldvalues* is a sequence of header field values as might be returned by - :meth:`Message.get_all `. Here's a simple +- :meth:`Message.get_all `. Here's a simple - example that gets all the recipients of a message:: -+ example that gets all the recipients of a message: ++ :meth:`Message.get_all `. ++ ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ Here's a simple example that gets all the recipients of a message:: from email.utils import getaddresses -@@ -99,6 +104,25 @@ of the new API. +@@ -99,6 +107,9 @@ of the new API. resent_ccs = msg.get_all('resent-cc', []) all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) -+ When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` -+ is returned in its place. Other errors in parsing the list of -+ addresses such as a fieldvalue seemingly parsing into multiple -+ addresses may result in a list containing a single empty 2-tuple -+ ``[('', '')]`` being returned rather than returning potentially -+ invalid output. -+ -+ Example malformed input parsing: -+ -+ .. doctest:: -+ -+ >>> from email.utils import getaddresses -+ >>> getaddresses(['alice@example.com ', 'me@example.com']) -+ [('', '')] -+ -+ .. versionchanged:: 3.12 -+ The 2-tuple of ``('', '')`` in the returned values when parsing -+ fails were added as to address a security issue. ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. + .. function:: parsedate(date) --- a/Lib/email/utils.py +++ b/Lib/email/utils.py -@@ -106,12 +106,54 @@ def formataddr(pair, charset='utf-8'): +@@ -48,6 +48,7 @@ TICK = "'" + specialsre = re.compile(r'[][\\()<>@,:;".]') + escapesre = re.compile(r'[\\"]') + ++ + def _has_surrogates(s): + """Return True if s contains surrogate-escaped binary data.""" + # This check is based on the fact that unless there are surrogates, utf8 +@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'): return address ++def _iter_escaped_chars(addr): ++ pos = 0 ++ escape = False ++ for pos, ch in enumerate(addr): ++ if escape: ++ yield (pos, '\\' + ch) ++ escape = False ++ elif ch == '\\': ++ escape = True ++ else: ++ yield (pos, ch) ++ if escape: ++ yield (pos, '\\') ++ ++ ++def _strip_quoted_realnames(addr): ++ """Strip real names between quotes.""" ++ if '"' not in addr: ++ # Fast path ++ return addr ++ ++ start = 0 ++ open_pos = None ++ result = [] ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '"': ++ if open_pos is None: ++ open_pos = pos ++ else: ++ if start != open_pos: ++ result.append(addr[start:open_pos]) ++ start = pos + 1 ++ open_pos = None + +-def getaddresses(fieldvalues): +- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" +- all = COMMASPACE.join(str(v) for v in fieldvalues) +- a = _AddressList(all) +- return a.addresslist ++ if start < len(addr): ++ result.append(addr[start:]) ++ ++ return ''.join(result) ++ ++ ++supports_strict_parsing = True ++ ++def getaddresses(fieldvalues, *, strict=True): ++ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. ++ ++ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in ++ its place. ++ ++ If strict is true, use a strict parser which rejects malformed inputs. ++ """ ++ ++ # If strict is true, if the resulting list of parsed addresses is greater ++ # than the number of fieldvalues in the input list, a parsing error has ++ # occurred and consequently a list containing a single empty 2-tuple [('', ++ # '')] is returned in its place. This is done to avoid invalid output. ++ # ++ # Malformed input: getaddresses(['alice@example.com ']) ++ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] ++ # Safe output: [('', '')] ++ ++ if not strict: ++ all = COMMASPACE.join(str(v) for v in fieldvalues) ++ a = _AddressList(all) ++ return a.addresslist ++ ++ fieldvalues = [str(v) for v in fieldvalues] ++ fieldvalues = _pre_parse_validation(fieldvalues) ++ addr = COMMASPACE.join(fieldvalues) ++ a = _AddressList(addr) ++ result = _post_parse_validation(a.addresslist) ++ ++ # Treat output as invalid if the number of addresses is not equal to the ++ # expected number of addresses. ++ n = 0 ++ for v in fieldvalues: ++ # When a comma is used in the Real Name part it is not a deliminator. ++ # So strip those out before counting the commas. ++ v = _strip_quoted_realnames(v) ++ # Expected number of addresses: 1 + number of commas ++ n += 1 + v.count(',') ++ if len(result) != n: ++ return [('', '')] ++ ++ return result ++ ++ ++def _check_parenthesis(addr): ++ # Ignore parenthesis in quoted real names. ++ addr = _strip_quoted_realnames(addr) ++ ++ opens = 0 ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '(': ++ opens += 1 ++ elif ch == ')': ++ opens -= 1 ++ if opens < 0: ++ return False ++ return (opens == 0) ++ ++ +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: -+ s = v.replace('\\(', '').replace('\\)', '') -+ if s.count('(') != s.count(')'): ++ if not _check_parenthesis(v): + v = "('', '')" + accepted_values.append(v) + @@ -81,46 +196,32 @@ + accepted_values.append(v) + + return accepted_values -+ - - def getaddresses(fieldvalues): -- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" -- all = COMMASPACE.join(str(v) for v in fieldvalues) -+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. -+ -+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in -+ its place. -+ -+ If the resulting list of parsed address is not the same as the number of -+ fieldvalues in the input list a parsing error has occurred. A list -+ containing a single empty 2-tuple [('', '')] is returned in its place. -+ This is done to avoid invalid output. -+ """ -+ fieldvalues = [str(v) for v in fieldvalues] -+ fieldvalues = _pre_parse_validation(fieldvalues) -+ all = COMMASPACE.join(v for v in fieldvalues) - a = _AddressList(all) -- return a.addresslist -+ result = _post_parse_validation(a.addresslist) -+ -+ n = 0 -+ for v in fieldvalues: -+ n += v.count(',') + 1 -+ -+ if len(result) != n: -+ return [('', '')] -+ -+ return result def _format_timetuple_and_zone(timetuple, zone): -@@ -209,9 +251,18 @@ def parseaddr(addr): +@@ -202,16 +318,33 @@ def parsedate_to_datetime(data): + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) + + +-def parseaddr(addr): ++def parseaddr(addr, *, strict=True): + """ + Parse addr into its constituent realname and email address parts. + Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). ++ ++ If strict is True, use a strict parser which rejects malformed inputs. """ - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' ++ if not strict: ++ addrs = _AddressList(addr).addresslist ++ if not addrs: ++ return ('', '') ++ return addrs[0] ++ + if isinstance(addr, list): + addr = addr[0] + @@ -138,104 +239,223 @@ --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py -@@ -3263,15 +3263,90 @@ Foo +@@ -16,6 +16,7 @@ from unittest.mock import patch + + import email + import email.policy ++import email.utils + + from email.charset import Charset + from email.header import Header, decode_header, make_header +@@ -3263,15 +3264,137 @@ Foo [('Al Person', 'aperson@dom.ain'), ('Bud Person', 'bperson@dom.ain')]) -+ def test_getaddresses_parsing_errors(self): -+ """Test for parsing errors from CVE-2023-27043""" -+ eq = self.assertEqual -+ eq(utils.getaddresses(['alice@example.org(']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org)']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org<']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org>']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org@']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org,']), -+ [('', 'alice@example.org'), ('', 'bob@example.com')]) -+ eq(utils.getaddresses(['alice@example.org;']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org:']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org.']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org"']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org[']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org]']), -+ [('', '')]) ++ def test_parsing_errors(self): ++ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" ++ alice = 'alice@example.org' ++ bob = 'bob@example.com' ++ empty = ('', '') + -+ def test_parseaddr_parsing_errors(self): -+ """Test for parsing errors from CVE-2023-27043""" -+ eq = self.assertEqual -+ eq(utils.parseaddr(['alice@example.org(']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org)']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org<']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org>']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org@']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org,']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org;']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org:']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org.']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org"']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org[']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org]']), -+ ('', '')) ++ # Test utils.getaddresses() and utils.parseaddr() on malformed email ++ # addresses: default behavior (strict=True) rejects malformed address, ++ # and strict=False which tolerates malformed address. ++ for invalid_separator, expected_non_strict in ( ++ ('(', [(f'<{bob}>', alice)]), ++ (')', [('', alice), empty, ('', bob)]), ++ ('<', [('', alice), empty, ('', bob), empty]), ++ ('>', [('', alice), empty, ('', bob)]), ++ ('[', [('', f'{alice}[<{bob}>]')]), ++ (']', [('', alice), empty, ('', bob)]), ++ ('@', [empty, empty, ('', bob)]), ++ (';', [('', alice), empty, ('', bob)]), ++ (':', [('', alice), ('', bob)]), ++ ('.', [('', alice + '.'), ('', bob)]), ++ ('"', [('', alice), ('', f'<{bob}>')]), ++ ): ++ address = f'{alice}{invalid_separator}<{bob}>' ++ with self.subTest(address=address): ++ self.assertEqual(utils.getaddresses([address]), ++ [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ expected_non_strict) ++ ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Comma (',') is treated differently depending on strict parameter. ++ # Comma without quotes. ++ address = f'{alice},<{bob}>' ++ self.assertEqual(utils.getaddresses([address]), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Real name between quotes containing comma. ++ address = '"Alice, alice@example.org" ' ++ expected_strict = ('Alice, alice@example.org', 'bob@example.com') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Valid parenthesis in comments. ++ address = 'alice@example.org (Alice)' ++ expected_strict = ('Alice', 'alice@example.org') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Invalid parenthesis in comments. ++ address = 'alice@example.org )Alice(' ++ self.assertEqual(utils.getaddresses([address]), [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Two addresses with quotes separated by comma. ++ address = '"Jane Doe" , "John Doe" ' ++ self.assertEqual(utils.getaddresses([address]), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Test email.utils.supports_strict_parsing attribute ++ self.assertEqual(email.utils.supports_strict_parsing, True) + def test_getaddresses_nasty(self): - eq = self.assertEqual - eq(utils.getaddresses(['foo: ;']), [('', '')]) +- eq = self.assertEqual +- eq(utils.getaddresses(['foo: ;']), [('', '')]) - eq(utils.getaddresses( - ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) -+ eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) - eq(utils.getaddresses( - ['foo: ;', '"Jason R. Mastaler" ']), - [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) -+ eq(utils.getaddresses( -+ [r'Pete(A nice \) chap) ']), -+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) -+ eq(utils.getaddresses( -+ ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), -+ [('', '')]) -+ eq(utils.getaddresses( -+ ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), -+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) -+ eq(utils.getaddresses( -+ ['John Doe ']), -+ [('John Doe (comment)', 'jdoe@machine.example')]) -+ eq(utils.getaddresses( -+ ['"Mary Smith: Personal Account" ']), -+ [('Mary Smith: Personal Account', 'smith@home.example')]) -+ eq(utils.getaddresses( -+ ['Undisclosed recipients:;']), -+ [('', '')]) -+ eq(utils.getaddresses( -+ [r', "Giant; \"Big\" Box" ']), -+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) +- eq(utils.getaddresses( +- ['foo: ;', '"Jason R. Mastaler" ']), +- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) ++ for addresses, expected in ( ++ (['"Sürname, Firstname" '], ++ [('Sürname, Firstname', 'to@example.com')]), ++ ++ (['foo: ;'], ++ [('', '')]), ++ ++ (['foo: ;', '"Jason R. Mastaler" '], ++ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), ++ ++ ([r'Pete(A nice \) chap) '], ++ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), ++ ++ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], ++ [('', '')]), ++ ++ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], ++ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), ++ ++ (['John Doe '], ++ [('John Doe (comment)', 'jdoe@machine.example')]), ++ ++ (['"Mary Smith: Personal Account" '], ++ [('Mary Smith: Personal Account', 'smith@home.example')]), ++ ++ (['Undisclosed recipients:;'], ++ [('', '')]), ++ ++ ([r', "Giant; \"Big\" Box" '], ++ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), ++ ): ++ with self.subTest(addresses=addresses): ++ self.assertEqual(utils.getaddresses(addresses), ++ expected) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ expected) ++ ++ addresses = ['[]*-- =~$'] ++ self.assertEqual(utils.getaddresses(addresses), ++ [('', '')]) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ [('', ''), ('', ''), ('', '*--')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" +@@ -3460,6 +3583,54 @@ multipart/report + m = cls(*constructor, policy=email.policy.default) + self.assertIs(m.policy, email.policy.default) + ++ def test_iter_escaped_chars(self): ++ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')), ++ [(0, 'a'), ++ (2, '\\\\'), ++ (3, 'b'), ++ (5, '\\"'), ++ (6, 'c'), ++ (8, '\\\\'), ++ (9, '"'), ++ (10, 'd')]) ++ self.assertEqual(list(utils._iter_escaped_chars('a\\')), ++ [(0, 'a'), (1, '\\')]) ++ ++ def test_strip_quoted_realnames(self): ++ def check(addr, expected): ++ self.assertEqual(utils._strip_quoted_realnames(addr), expected) ++ ++ check('"Jane Doe" , "John Doe" ', ++ ' , ') ++ check(r'"Jane \"Doe\"." ', ++ ' ') ++ ++ # special cases ++ check(r'before"name"after', 'beforeafter') ++ check(r'before"name"', 'before') ++ check(r'b"name"', 'b') # single char ++ check(r'"name"after', 'after') ++ check(r'"name"a', 'a') # single char ++ check(r'"name"', '') ++ ++ # no change ++ for addr in ( ++ 'Jane Doe , John Doe ', ++ 'lone " quote', ++ ): ++ self.assertEqual(utils._strip_quoted_realnames(addr), addr) ++ ++ ++ def test_check_parenthesis(self): ++ addr = 'alice@example.net' ++ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice(')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)')) ++ ++ # Ignore real name between quotes ++ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}')) ++ + + # Test the iterator/generators + class TestIterators(TestEmailBase): --- /dev/null -+++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -@@ -0,0 +1,4 @@ -+CVE-2023-27043: Prevent :func:`email.utils.parseaddr` -+and :func:`email.utils.getaddresses` from returning the realname portion of an -+invalid RFC2822 email header in the email address portion of the 2-tuple -+returned after being parsed by :class:`email._parseaddr.AddressList`. ++++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst +@@ -0,0 +1,8 @@ ++:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now ++return ``('', '')`` 2-tuples in more situations where invalid email ++addresses are encountered instead of potentially inaccurate values. Add ++optional *strict* parameter to these two functions: use ``strict=False`` to ++get the old behavior, accept malformed inputs. ++``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check ++if the *strict* paramater is available. Patch by Thomas Dwyer and Victor ++Stinner to improve the CVE-2023-27043 fix. diff --git a/Revert-gh105127-left-tests.patch b/Revert-gh105127-left-tests.patch deleted file mode 100644 index a64b7bd..0000000 --- a/Revert-gh105127-left-tests.patch +++ /dev/null @@ -1,283 +0,0 @@ -From 4288c623d62cf90d8e4444facb3379fb06d01140 Mon Sep 17 00:00:00 2001 -From: "Gregory P. Smith" -Date: Thu, 20 Jul 2023 20:30:52 -0700 -Subject: [PATCH] [3.12] gh-106669: Revert "gh-102988: Detect email address - parsing errors ... (GH-105127)" (GH-106733) - -This reverts commit 18dfbd035775c15533d13a98e56b1d2bf5c65f00. -Adds a regression test from the issue. - -See https://github.com/python/cpython/issues/106669.. -(cherry picked from commit a31dea1feb61793e48fa9aa5014f358352205c1d) - -Co-authored-by: Gregory P. Smith ---- - Doc/library/email.utils.rst | 26 -- - Lib/email/utils.py | 63 ------ - Lib/test/test_email/test_email.py | 96 +--------- - Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 5 - 4 files changed, 31 insertions(+), 159 deletions(-) - create mode 100644 Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst - -Index: Python-3.9.17/Doc/library/email.utils.rst -=================================================================== ---- Python-3.9.17.orig/Doc/library/email.utils.rst -+++ Python-3.9.17/Doc/library/email.utils.rst -@@ -67,11 +67,6 @@ of the new API. - *email address* parts. Returns a tuple of that information, unless the parse - fails, in which case a 2-tuple of ``('', '')`` is returned. - -- .. versionchanged:: 3.12 -- For security reasons, addresses that were ambiguous and could parse into -- multiple different addresses now cause ``('', '')`` to be returned -- instead of only one of the *potential* addresses. -- - - .. function:: formataddr(pair, charset='utf-8') - -@@ -94,7 +89,7 @@ of the new API. - This method returns a list of 2-tuples of the form returned by ``parseaddr()``. - *fieldvalues* is a sequence of header field values as might be returned by - :meth:`Message.get_all `. Here's a simple -- example that gets all the recipients of a message: -+ example that gets all the recipients of a message:: - - from email.utils import getaddresses - -@@ -104,25 +99,6 @@ of the new API. - resent_ccs = msg.get_all('resent-cc', []) - all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) - -- When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` -- is returned in its place. Other errors in parsing the list of -- addresses such as a fieldvalue seemingly parsing into multiple -- addresses may result in a list containing a single empty 2-tuple -- ``[('', '')]`` being returned rather than returning potentially -- invalid output. -- -- Example malformed input parsing: -- -- .. doctest:: -- -- >>> from email.utils import getaddresses -- >>> getaddresses(['alice@example.com ', 'me@example.com']) -- [('', '')] -- -- .. versionchanged:: 3.12 -- The 2-tuple of ``('', '')`` in the returned values when parsing -- fails were added as to address a security issue. -- - - .. function:: parsedate(date) - -Index: Python-3.9.17/Lib/email/utils.py -=================================================================== ---- Python-3.9.17.orig/Lib/email/utils.py -+++ Python-3.9.17/Lib/email/utils.py -@@ -106,54 +106,12 @@ def formataddr(pair, charset='utf-8'): - return address - - --def _pre_parse_validation(email_header_fields): -- accepted_values = [] -- for v in email_header_fields: -- s = v.replace('\\(', '').replace('\\)', '') -- if s.count('(') != s.count(')'): -- v = "('', '')" -- accepted_values.append(v) -- -- return accepted_values -- -- --def _post_parse_validation(parsed_email_header_tuples): -- accepted_values = [] -- # The parser would have parsed a correctly formatted domain-literal -- # The existence of an [ after parsing indicates a parsing failure -- for v in parsed_email_header_tuples: -- if '[' in v[1]: -- v = ('', '') -- accepted_values.append(v) -- -- return accepted_values -- - - def getaddresses(fieldvalues): -- """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. -- -- When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in -- its place. -- -- If the resulting list of parsed address is not the same as the number of -- fieldvalues in the input list a parsing error has occurred. A list -- containing a single empty 2-tuple [('', '')] is returned in its place. -- This is done to avoid invalid output. -- """ -- fieldvalues = [str(v) for v in fieldvalues] -- fieldvalues = _pre_parse_validation(fieldvalues) -- all = COMMASPACE.join(v for v in fieldvalues) -+ """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" -+ all = COMMASPACE.join(str(v) for v in fieldvalues) - a = _AddressList(all) -- result = _post_parse_validation(a.addresslist) -- -- n = 0 -- for v in fieldvalues: -- n += v.count(',') + 1 -- -- if len(result) != n: -- return [('', '')] -- -- return result -+ return a.addresslist - - - def _format_timetuple_and_zone(timetuple, zone): -@@ -251,18 +209,9 @@ def parseaddr(addr): - Return a tuple of realname and email address, unless the parse fails, in - which case return a 2-tuple of ('', ''). - """ -- if isinstance(addr, list): -- addr = addr[0] -- -- if not isinstance(addr, str): -- return ('', '') -- -- addr = _pre_parse_validation([addr])[0] -- addrs = _post_parse_validation(_AddressList(addr).addresslist) -- -- if not addrs or len(addrs) > 1: -- return ('', '') -- -+ addrs = _AddressList(addr).addresslist -+ if not addrs: -+ return '', '' - return addrs[0] - - -Index: Python-3.9.17/Lib/test/test_email/test_email.py -=================================================================== ---- Python-3.9.17.orig/Lib/test/test_email/test_email.py -+++ Python-3.9.17/Lib/test/test_email/test_email.py -@@ -3263,90 +3263,32 @@ Foo - [('Al Person', 'aperson@dom.ain'), - ('Bud Person', 'bperson@dom.ain')]) - -- def test_getaddresses_parsing_errors(self): -- """Test for parsing errors from CVE-2023-27043""" -- eq = self.assertEqual -- eq(utils.getaddresses(['alice@example.org(']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org)']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org<']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org>']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org@']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org,']), -- [('', 'alice@example.org'), ('', 'bob@example.com')]) -- eq(utils.getaddresses(['alice@example.org;']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org:']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org.']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org"']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org[']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org]']), -- [('', '')]) -- -- def test_parseaddr_parsing_errors(self): -- """Test for parsing errors from CVE-2023-27043""" -- eq = self.assertEqual -- eq(utils.parseaddr(['alice@example.org(']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org)']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org<']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org>']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org@']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org,']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org;']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org:']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org.']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org"']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org[']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org]']), -- ('', '')) -+ def test_getaddresses_comma_in_name(self): -+ """GH-106669 regression test.""" -+ self.assertEqual( -+ utils.getaddresses( -+ [ -+ '"Bud, Person" ', -+ 'aperson@dom.ain (Al Person)', -+ '"Mariusz Felisiak" ', -+ ] -+ ), -+ [ -+ ('Bud, Person', 'bperson@dom.ain'), -+ ('Al Person', 'aperson@dom.ain'), -+ ('Mariusz Felisiak', 'to@example.com'), -+ ], -+ ) - - def test_getaddresses_nasty(self): - eq = self.assertEqual - eq(utils.getaddresses(['foo: ;']), [('', '')]) -- eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) -+ eq(utils.getaddresses( -+ ['[]*-- =~$']), -+ [('', ''), ('', ''), ('', '*--')]) - eq(utils.getaddresses( - ['foo: ;', '"Jason R. Mastaler" ']), - [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) -- eq(utils.getaddresses( -- [r'Pete(A nice \) chap) ']), -- [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) -- eq(utils.getaddresses( -- ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), -- [('', '')]) -- eq(utils.getaddresses( -- ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), -- [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) -- eq(utils.getaddresses( -- ['John Doe ']), -- [('John Doe (comment)', 'jdoe@machine.example')]) -- eq(utils.getaddresses( -- ['"Mary Smith: Personal Account" ']), -- [('Mary Smith: Personal Account', 'smith@home.example')]) -- eq(utils.getaddresses( -- ['Undisclosed recipients:;']), -- [('', '')]) -- eq(utils.getaddresses( -- [r', "Giant; \"Big\" Box" ']), -- [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) - - def test_getaddresses_embedded_comment(self): - """Test proper handling of a nested comment""" -Index: Python-3.9.17/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -=================================================================== ---- Python-3.9.17.orig/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -+++ Python-3.9.17/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -@@ -1,3 +1,8 @@ -+Reverted the :mod:`email.utils` security improvement change released in -+3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail -+to parse email addresses with a comma in the quoted name field. -+See :gh:`106669`. -+ - CVE-2023-27043: Prevent :func:`email.utils.parseaddr` - and :func:`email.utils.getaddresses` from returning the realname portion of an - invalid RFC2822 email header in the email address portion of the 2-tuple diff --git a/libexpat260.patch b/libexpat260.patch new file mode 100644 index 0000000..d1da4a6 --- /dev/null +++ b/libexpat260.patch @@ -0,0 +1,107 @@ +From f2eebf3c38eae77765247791576b437ec25ccfe2 Mon Sep 17 00:00:00 2001 +From: Serhiy Storchaka +Date: Sun, 11 Feb 2024 12:08:39 +0200 +Subject: [PATCH] gh-115133: Fix tests for XMLPullParser with Expat 2.6.0 + (GH-115164) + +Feeding the parser by too small chunks defers parsing to prevent +CVE-2023-52425. Future versions of Expat may be more reactive. +(cherry picked from commit 4a08e7b3431cd32a0daf22a33421cd3035343dc4) + +Co-authored-by: Serhiy Storchaka +--- + Lib/test/test_xml_etree.py | 58 ++++++++++++------- + ...-02-08-14-21-28.gh-issue-115133.ycl4ko.rst | 2 + + 2 files changed, 38 insertions(+), 22 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst + +Index: Python-3.9.18/Lib/test/test_xml_etree.py +=================================================================== +--- Python-3.9.18.orig/Lib/test/test_xml_etree.py ++++ Python-3.9.18/Lib/test/test_xml_etree.py +@@ -13,6 +13,7 @@ import itertools + import operator + import os + import pickle ++import pyexpat + import sys + import textwrap + import types +@@ -102,6 +103,10 @@ EXTERNAL_ENTITY_XML = """\ + &entity; + """ + ++fails_with_expat_2_6_0 = (unittest.expectedFailure ++ if pyexpat.version_info >= (2, 6, 0) else ++ lambda test: test) ++ + def checkwarnings(*filters, quiet=False): + def decorator(test): + def newtest(*args, **kwargs): +@@ -1391,28 +1396,37 @@ class XMLPullParserTest(unittest.TestCas + self.assertEqual([(action, elem.tag) for action, elem in events], + expected) + +- def test_simple_xml(self): +- for chunk_size in (None, 1, 5): +- with self.subTest(chunk_size=chunk_size): +- parser = ET.XMLPullParser() +- self.assert_event_tags(parser, []) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, []) +- self._feed(parser, +- "\n text\n", chunk_size) +- self.assert_event_tags(parser, [('end', 'element')]) +- self._feed(parser, "texttail\n", chunk_size) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, [ +- ('end', 'element'), +- ('end', 'empty-element'), +- ]) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, [('end', 'root')]) +- self.assertIsNone(parser.close()) ++ def test_simple_xml(self, chunk_size=None): ++ parser = ET.XMLPullParser() ++ self.assert_event_tags(parser, []) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, []) ++ self._feed(parser, ++ "\n text\n", chunk_size) ++ self.assert_event_tags(parser, [('end', 'element')]) ++ self._feed(parser, "texttail\n", chunk_size) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, [ ++ ('end', 'element'), ++ ('end', 'empty-element'), ++ ]) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, [('end', 'root')]) ++ self.assertIsNone(parser.close()) ++ ++ @fails_with_expat_2_6_0 ++ def test_simple_xml_chunk_1(self): ++ self.test_simple_xml(chunk_size=1) ++ ++ @fails_with_expat_2_6_0 ++ def test_simple_xml_chunk_5(self): ++ self.test_simple_xml(chunk_size=5) ++ ++ def test_simple_xml_chunk_22(self): ++ self.test_simple_xml(chunk_size=22) + + def test_feed_while_iterating(self): + parser = ET.XMLPullParser() +Index: Python-3.9.18/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst +=================================================================== +--- /dev/null ++++ Python-3.9.18/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst +@@ -0,0 +1,2 @@ ++Fix tests for :class:`~xml.etree.ElementTree.XMLPullParser` with Expat ++2.6.0. diff --git a/python39.changes b/python39.changes index 04fdd82..9c812ce 100644 --- a/python39.changes +++ b/python39.changes @@ -1,3 +1,17 @@ +------------------------------------------------------------------- +Thu Feb 15 10:29:07 UTC 2024 - Daniel Garcia + +- Add upstream patch libexpat260.patch, Fix tests for XMLPullParser + with Expat 2.6.0, gh#python/cpython#115289 + +------------------------------------------------------------------- +Mon Dec 18 16:20:58 UTC 2023 - Matej Cepl + +- Refresh CVE-2023-27043-email-parsing-errors.patch to + gh#python/cpython!111116, fixing bsc#1210638 (CVE-2023-27043). +- Thus we can remove Revert-gh105127-left-tests.patch, which is + now useless. + ------------------------------------------------------------------- Wed Sep 6 06:38:27 UTC 2023 - Daniel Garcia diff --git a/python39.spec b/python39.spec index 8dd52e6..7abc7a0 100644 --- a/python39.spec +++ b/python39.spec @@ -1,7 +1,7 @@ # -# spec file +# spec file for package python39 # -# Copyright (c) 2023 SUSE LLC +# Copyright (c) 2024 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -178,9 +178,9 @@ Patch41: downport-Sphinx-features.patch # indicate the parsing error (old API), from gh#python/cpython!105127 # Patch carries a REGRESSION (gh#python/cpython#106669), so it has been also partially REVERTED Patch42: CVE-2023-27043-email-parsing-errors.patch -# PATCH-FIX-UPSTREAM Revert-gh105127-left-tests.patch bsc#1210638 mcepl@suse.com -# Partially revert previous patch -Patch43: Revert-gh105127-left-tests.patch +# PATCH-FIX-UPSTREAM libexpat260.patch gh#python/cpython#115289 +# Fix tests for XMLPullParser with Expat 2.6.0 +Patch43: libexpat260.patch BuildRequires: autoconf-archive BuildRequires: automake BuildRequires: fdupes @@ -420,32 +420,32 @@ other applications. %prep %setup -q -n %{tarname} -%patch02 -p1 -%patch06 -p1 -%patch07 -p1 -%patch08 -p1 -%patch09 -p1 -%patch15 -p1 -%patch25 -p1 -%patch29 -p1 -%patch32 -p1 +%patch -P 02 -p1 +%patch -P 06 -p1 +%patch -P 07 -p1 +%patch -P 08 -p1 +%patch -P 09 -p1 +%patch -P 15 -p1 +%patch -P 25 -p1 +%patch -P 29 -p1 +%patch -P 32 -p1 %if 0%{?sle_version} && 0%{?sle_version} <= 150300 -%patch33 -p1 -%patch34 -p1 +%patch -P 33 -p1 +%patch -P 34 -p1 %endif %if %{with mpdecimal} -%patch05 -p1 +%patch -P 05 -p1 %endif -%patch35 -p1 -%patch37 -p1 -%patch38 -p1 -%patch39 -p1 -%patch40 -p1 +%patch -P 35 -p1 +%patch -P 37 -p1 +%patch -P 38 -p1 +%patch -P 39 -p1 +%patch -P 40 -p1 %if 0%{?sle_version} && 0%{?sle_version} <= 150500 -%patch41 -p1 +%patch -P 41 -p1 %endif -%patch42 -p1 -%patch43 -p1 +%patch -P 42 -p1 +%patch -P 43 -p1 # drop Autoconf version requirement sed -i 's/^AC_PREREQ/dnl AC_PREREQ/' configure.ac