diff --git a/CVE-2023-27043-email-parsing-errors.patch b/CVE-2023-27043-email-parsing-errors.patch index 7aec4a2..6e4cc64 100644 --- a/CVE-2023-27043-email-parsing-errors.patch +++ b/CVE-2023-27043-email-parsing-errors.patch @@ -1,74 +1,185 @@ --- - Doc/library/email.utils.rst | 26 +++ - Lib/email/utils.py | 63 +++++++ - Lib/test/test_email/test_email.py | 81 +++++++++- - Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 4 - 4 files changed, 164 insertions(+), 10 deletions(-) + Doc/library/email.utils.rst | 19 - + Lib/email/utils.py | 151 +++++++- + Lib/test/test_email/test_email.py | 187 +++++++++- + Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 + 4 files changed, 344 insertions(+), 21 deletions(-) -Index: Python-3.11.4/Doc/library/email.utils.rst -=================================================================== ---- Python-3.11.4.orig/Doc/library/email.utils.rst -+++ Python-3.11.4/Doc/library/email.utils.rst -@@ -67,6 +67,11 @@ of the new API. +--- a/Doc/library/email.utils.rst ++++ b/Doc/library/email.utils.rst +@@ -60,13 +60,18 @@ of the new API. + begins with angle brackets, they are stripped off. + + +-.. function:: parseaddr(address) ++.. function:: parseaddr(address, *, strict=True) + + Parse address -- which should be the value of some address-containing field such + as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. -+ .. versionchanged:: 3.12 -+ For security reasons, addresses that were ambiguous and could parse into -+ multiple different addresses now cause ``('', '')`` to be returned -+ instead of only one of the *potential* addresses. ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. + .. function:: formataddr(pair, charset='utf-8') -@@ -89,7 +94,7 @@ of the new API. +@@ -84,12 +89,15 @@ of the new API. + Added the *charset* option. + + +-.. function:: getaddresses(fieldvalues) ++.. function:: getaddresses(fieldvalues, *, strict=True) + This method returns a list of 2-tuples of the form returned by ``parseaddr()``. *fieldvalues* is a sequence of header field values as might be returned by - :meth:`Message.get_all `. Here's a simple +- :meth:`Message.get_all `. Here's a simple - example that gets all the recipients of a message:: -+ example that gets all the recipients of a message: ++ :meth:`Message.get_all `. ++ ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ Here's a simple example that gets all the recipients of a message:: from email.utils import getaddresses -@@ -99,6 +104,25 @@ of the new API. +@@ -99,6 +107,9 @@ of the new API. resent_ccs = msg.get_all('resent-cc', []) all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) -+ When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` -+ is returned in its place. Other errors in parsing the list of -+ addresses such as a fieldvalue seemingly parsing into multiple -+ addresses may result in a list containing a single empty 2-tuple -+ ``[('', '')]`` being returned rather than returning potentially -+ invalid output. -+ -+ Example malformed input parsing: -+ -+ .. doctest:: -+ -+ >>> from email.utils import getaddresses -+ >>> getaddresses(['alice@example.com ', 'me@example.com']) -+ [('', '')] -+ -+ .. versionchanged:: 3.12 -+ The 2-tuple of ``('', '')`` in the returned values when parsing -+ fails were added as to address a security issue. ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. + .. function:: parsedate(date) -Index: Python-3.11.4/Lib/email/utils.py -=================================================================== ---- Python-3.11.4.orig/Lib/email/utils.py -+++ Python-3.11.4/Lib/email/utils.py -@@ -106,12 +106,54 @@ def formataddr(pair, charset='utf-8'): +--- a/Lib/email/utils.py ++++ b/Lib/email/utils.py +@@ -48,6 +48,7 @@ TICK = "'" + specialsre = re.compile(r'[][\\()<>@,:;".]') + escapesre = re.compile(r'[\\"]') + ++ + def _has_surrogates(s): + """Return True if s contains surrogate-escaped binary data.""" + # This check is based on the fact that unless there are surrogates, utf8 +@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'): return address ++def _iter_escaped_chars(addr): ++ pos = 0 ++ escape = False ++ for pos, ch in enumerate(addr): ++ if escape: ++ yield (pos, '\\' + ch) ++ escape = False ++ elif ch == '\\': ++ escape = True ++ else: ++ yield (pos, ch) ++ if escape: ++ yield (pos, '\\') ++ ++ ++def _strip_quoted_realnames(addr): ++ """Strip real names between quotes.""" ++ if '"' not in addr: ++ # Fast path ++ return addr ++ ++ start = 0 ++ open_pos = None ++ result = [] ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '"': ++ if open_pos is None: ++ open_pos = pos ++ else: ++ if start != open_pos: ++ result.append(addr[start:open_pos]) ++ start = pos + 1 ++ open_pos = None + +-def getaddresses(fieldvalues): +- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" +- all = COMMASPACE.join(str(v) for v in fieldvalues) +- a = _AddressList(all) +- return a.addresslist ++ if start < len(addr): ++ result.append(addr[start:]) ++ ++ return ''.join(result) ++ ++ ++supports_strict_parsing = True ++ ++def getaddresses(fieldvalues, *, strict=True): ++ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. ++ ++ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in ++ its place. ++ ++ If strict is true, use a strict parser which rejects malformed inputs. ++ """ ++ ++ # If strict is true, if the resulting list of parsed addresses is greater ++ # than the number of fieldvalues in the input list, a parsing error has ++ # occurred and consequently a list containing a single empty 2-tuple [('', ++ # '')] is returned in its place. This is done to avoid invalid output. ++ # ++ # Malformed input: getaddresses(['alice@example.com ']) ++ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] ++ # Safe output: [('', '')] ++ ++ if not strict: ++ all = COMMASPACE.join(str(v) for v in fieldvalues) ++ a = _AddressList(all) ++ return a.addresslist ++ ++ fieldvalues = [str(v) for v in fieldvalues] ++ fieldvalues = _pre_parse_validation(fieldvalues) ++ addr = COMMASPACE.join(fieldvalues) ++ a = _AddressList(addr) ++ result = _post_parse_validation(a.addresslist) ++ ++ # Treat output as invalid if the number of addresses is not equal to the ++ # expected number of addresses. ++ n = 0 ++ for v in fieldvalues: ++ # When a comma is used in the Real Name part it is not a deliminator. ++ # So strip those out before counting the commas. ++ v = _strip_quoted_realnames(v) ++ # Expected number of addresses: 1 + number of commas ++ n += 1 + v.count(',') ++ if len(result) != n: ++ return [('', '')] ++ ++ return result ++ ++ ++def _check_parenthesis(addr): ++ # Ignore parenthesis in quoted real names. ++ addr = _strip_quoted_realnames(addr) ++ ++ opens = 0 ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '(': ++ opens += 1 ++ elif ch == ')': ++ opens -= 1 ++ if opens < 0: ++ return False ++ return (opens == 0) ++ ++ +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: -+ s = v.replace('\\(', '').replace('\\)', '') -+ if s.count('(') != s.count(')'): ++ if not _check_parenthesis(v): + v = "('', '')" + accepted_values.append(v) + @@ -85,46 +196,32 @@ Index: Python-3.11.4/Lib/email/utils.py + accepted_values.append(v) + + return accepted_values -+ - - def getaddresses(fieldvalues): -- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" -- all = COMMASPACE.join(str(v) for v in fieldvalues) -+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. -+ -+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in -+ its place. -+ -+ If the resulting list of parsed address is not the same as the number of -+ fieldvalues in the input list a parsing error has occurred. A list -+ containing a single empty 2-tuple [('', '')] is returned in its place. -+ This is done to avoid invalid output. -+ """ -+ fieldvalues = [str(v) for v in fieldvalues] -+ fieldvalues = _pre_parse_validation(fieldvalues) -+ all = COMMASPACE.join(v for v in fieldvalues) - a = _AddressList(all) -- return a.addresslist -+ result = _post_parse_validation(a.addresslist) -+ -+ n = 0 -+ for v in fieldvalues: -+ n += v.count(',') + 1 -+ -+ if len(result) != n: -+ return [('', '')] -+ -+ return result def _format_timetuple_and_zone(timetuple, zone): -@@ -212,9 +254,18 @@ def parseaddr(addr): +@@ -205,16 +321,33 @@ def parsedate_to_datetime(data): + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) + + +-def parseaddr(addr): ++def parseaddr(addr, *, strict=True): + """ + Parse addr into its constituent realname and email address parts. + Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). ++ ++ If strict is True, use a strict parser which rejects malformed inputs. """ - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' ++ if not strict: ++ addrs = _AddressList(addr).addresslist ++ if not addrs: ++ return ('', '') ++ return addrs[0] ++ + if isinstance(addr, list): + addr = addr[0] + @@ -140,110 +237,225 @@ Index: Python-3.11.4/Lib/email/utils.py return addrs[0] -Index: Python-3.11.4/Lib/test/test_email/test_email.py -=================================================================== ---- Python-3.11.4.orig/Lib/test/test_email/test_email.py -+++ Python-3.11.4/Lib/test/test_email/test_email.py -@@ -3320,15 +3320,90 @@ Foo +--- a/Lib/test/test_email/test_email.py ++++ b/Lib/test/test_email/test_email.py +@@ -17,6 +17,7 @@ from unittest.mock import patch + + import email + import email.policy ++import email.utils + + from email.charset import Charset + from email.generator import Generator, DecodedGenerator, BytesGenerator +@@ -3321,15 +3322,137 @@ Foo [('Al Person', 'aperson@dom.ain'), ('Bud Person', 'bperson@dom.ain')]) -+ def test_getaddresses_parsing_errors(self): -+ """Test for parsing errors from CVE-2023-27043""" -+ eq = self.assertEqual -+ eq(utils.getaddresses(['alice@example.org(']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org)']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org<']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org>']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org@']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org,']), -+ [('', 'alice@example.org'), ('', 'bob@example.com')]) -+ eq(utils.getaddresses(['alice@example.org;']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org:']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org.']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org"']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org[']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org]']), -+ [('', '')]) ++ def test_parsing_errors(self): ++ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" ++ alice = 'alice@example.org' ++ bob = 'bob@example.com' ++ empty = ('', '') + -+ def test_parseaddr_parsing_errors(self): -+ """Test for parsing errors from CVE-2023-27043""" -+ eq = self.assertEqual -+ eq(utils.parseaddr(['alice@example.org(']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org)']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org<']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org>']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org@']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org,']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org;']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org:']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org.']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org"']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org[']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org]']), -+ ('', '')) ++ # Test utils.getaddresses() and utils.parseaddr() on malformed email ++ # addresses: default behavior (strict=True) rejects malformed address, ++ # and strict=False which tolerates malformed address. ++ for invalid_separator, expected_non_strict in ( ++ ('(', [(f'<{bob}>', alice)]), ++ (')', [('', alice), empty, ('', bob)]), ++ ('<', [('', alice), empty, ('', bob), empty]), ++ ('>', [('', alice), empty, ('', bob)]), ++ ('[', [('', f'{alice}[<{bob}>]')]), ++ (']', [('', alice), empty, ('', bob)]), ++ ('@', [empty, empty, ('', bob)]), ++ (';', [('', alice), empty, ('', bob)]), ++ (':', [('', alice), ('', bob)]), ++ ('.', [('', alice + '.'), ('', bob)]), ++ ('"', [('', alice), ('', f'<{bob}>')]), ++ ): ++ address = f'{alice}{invalid_separator}<{bob}>' ++ with self.subTest(address=address): ++ self.assertEqual(utils.getaddresses([address]), ++ [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ expected_non_strict) ++ ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Comma (',') is treated differently depending on strict parameter. ++ # Comma without quotes. ++ address = f'{alice},<{bob}>' ++ self.assertEqual(utils.getaddresses([address]), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Real name between quotes containing comma. ++ address = '"Alice, alice@example.org" ' ++ expected_strict = ('Alice, alice@example.org', 'bob@example.com') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Valid parenthesis in comments. ++ address = 'alice@example.org (Alice)' ++ expected_strict = ('Alice', 'alice@example.org') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Invalid parenthesis in comments. ++ address = 'alice@example.org )Alice(' ++ self.assertEqual(utils.getaddresses([address]), [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Two addresses with quotes separated by comma. ++ address = '"Jane Doe" , "John Doe" ' ++ self.assertEqual(utils.getaddresses([address]), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Test email.utils.supports_strict_parsing attribute ++ self.assertEqual(email.utils.supports_strict_parsing, True) + def test_getaddresses_nasty(self): - eq = self.assertEqual - eq(utils.getaddresses(['foo: ;']), [('', '')]) +- eq = self.assertEqual +- eq(utils.getaddresses(['foo: ;']), [('', '')]) - eq(utils.getaddresses( - ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) -+ eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) - eq(utils.getaddresses( - ['foo: ;', '"Jason R. Mastaler" ']), - [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) -+ eq(utils.getaddresses( -+ [r'Pete(A nice \) chap) ']), -+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) -+ eq(utils.getaddresses( -+ ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), -+ [('', '')]) -+ eq(utils.getaddresses( -+ ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), -+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) -+ eq(utils.getaddresses( -+ ['John Doe ']), -+ [('John Doe (comment)', 'jdoe@machine.example')]) -+ eq(utils.getaddresses( -+ ['"Mary Smith: Personal Account" ']), -+ [('Mary Smith: Personal Account', 'smith@home.example')]) -+ eq(utils.getaddresses( -+ ['Undisclosed recipients:;']), -+ [('', '')]) -+ eq(utils.getaddresses( -+ [r', "Giant; \"Big\" Box" ']), -+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) +- eq(utils.getaddresses( +- ['foo: ;', '"Jason R. Mastaler" ']), +- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) ++ for addresses, expected in ( ++ (['"Sürname, Firstname" '], ++ [('Sürname, Firstname', 'to@example.com')]), ++ ++ (['foo: ;'], ++ [('', '')]), ++ ++ (['foo: ;', '"Jason R. Mastaler" '], ++ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), ++ ++ ([r'Pete(A nice \) chap) '], ++ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), ++ ++ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], ++ [('', '')]), ++ ++ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], ++ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), ++ ++ (['John Doe '], ++ [('John Doe (comment)', 'jdoe@machine.example')]), ++ ++ (['"Mary Smith: Personal Account" '], ++ [('Mary Smith: Personal Account', 'smith@home.example')]), ++ ++ (['Undisclosed recipients:;'], ++ [('', '')]), ++ ++ ([r', "Giant; \"Big\" Box" '], ++ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), ++ ): ++ with self.subTest(addresses=addresses): ++ self.assertEqual(utils.getaddresses(addresses), ++ expected) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ expected) ++ ++ addresses = ['[]*-- =~$'] ++ self.assertEqual(utils.getaddresses(addresses), ++ [('', '')]) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ [('', ''), ('', ''), ('', '*--')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" -Index: Python-3.11.4/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -=================================================================== +@@ -3520,6 +3643,54 @@ multipart/report + m = cls(*constructor, policy=email.policy.default) + self.assertIs(m.policy, email.policy.default) + ++ def test_iter_escaped_chars(self): ++ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')), ++ [(0, 'a'), ++ (2, '\\\\'), ++ (3, 'b'), ++ (5, '\\"'), ++ (6, 'c'), ++ (8, '\\\\'), ++ (9, '"'), ++ (10, 'd')]) ++ self.assertEqual(list(utils._iter_escaped_chars('a\\')), ++ [(0, 'a'), (1, '\\')]) ++ ++ def test_strip_quoted_realnames(self): ++ def check(addr, expected): ++ self.assertEqual(utils._strip_quoted_realnames(addr), expected) ++ ++ check('"Jane Doe" , "John Doe" ', ++ ' , ') ++ check(r'"Jane \"Doe\"." ', ++ ' ') ++ ++ # special cases ++ check(r'before"name"after', 'beforeafter') ++ check(r'before"name"', 'before') ++ check(r'b"name"', 'b') # single char ++ check(r'"name"after', 'after') ++ check(r'"name"a', 'a') # single char ++ check(r'"name"', '') ++ ++ # no change ++ for addr in ( ++ 'Jane Doe , John Doe ', ++ 'lone " quote', ++ ): ++ self.assertEqual(utils._strip_quoted_realnames(addr), addr) ++ ++ ++ def test_check_parenthesis(self): ++ addr = 'alice@example.net' ++ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice(')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)')) ++ ++ # Ignore real name between quotes ++ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}')) ++ + + # Test the iterator/generators + class TestIterators(TestEmailBase): --- /dev/null -+++ Python-3.11.4/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -@@ -0,0 +1,4 @@ -+CVE-2023-27043: Prevent :func:`email.utils.parseaddr` -+and :func:`email.utils.getaddresses` from returning the realname portion of an -+invalid RFC2822 email header in the email address portion of the 2-tuple -+returned after being parsed by :class:`email._parseaddr.AddressList`. ++++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst +@@ -0,0 +1,8 @@ ++:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now ++return ``('', '')`` 2-tuples in more situations where invalid email ++addresses are encountered instead of potentially inaccurate values. Add ++optional *strict* parameter to these two functions: use ``strict=False`` to ++get the old behavior, accept malformed inputs. ++``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check ++if the *strict* paramater is available. Patch by Thomas Dwyer and Victor ++Stinner to improve the CVE-2023-27043 fix. diff --git a/Revert-gh105127-left-tests.patch b/Revert-gh105127-left-tests.patch deleted file mode 100644 index 87ce40a..0000000 --- a/Revert-gh105127-left-tests.patch +++ /dev/null @@ -1,283 +0,0 @@ -From 4288c623d62cf90d8e4444facb3379fb06d01140 Mon Sep 17 00:00:00 2001 -From: "Gregory P. Smith" -Date: Thu, 20 Jul 2023 20:30:52 -0700 -Subject: [PATCH] [3.12] gh-106669: Revert "gh-102988: Detect email address - parsing errors ... (GH-105127)" (GH-106733) - -This reverts commit 18dfbd035775c15533d13a98e56b1d2bf5c65f00. -Adds a regression test from the issue. - -See https://github.com/python/cpython/issues/106669.. -(cherry picked from commit a31dea1feb61793e48fa9aa5014f358352205c1d) - -Co-authored-by: Gregory P. Smith ---- - Doc/library/email.utils.rst | 26 -- - Lib/email/utils.py | 63 ------ - Lib/test/test_email/test_email.py | 96 +--------- - Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 5 - 4 files changed, 31 insertions(+), 159 deletions(-) - create mode 100644 Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst - -Index: Python-3.11.4/Doc/library/email.utils.rst -=================================================================== ---- Python-3.11.4.orig/Doc/library/email.utils.rst -+++ Python-3.11.4/Doc/library/email.utils.rst -@@ -67,11 +67,6 @@ of the new API. - *email address* parts. Returns a tuple of that information, unless the parse - fails, in which case a 2-tuple of ``('', '')`` is returned. - -- .. versionchanged:: 3.12 -- For security reasons, addresses that were ambiguous and could parse into -- multiple different addresses now cause ``('', '')`` to be returned -- instead of only one of the *potential* addresses. -- - - .. function:: formataddr(pair, charset='utf-8') - -@@ -94,7 +89,7 @@ of the new API. - This method returns a list of 2-tuples of the form returned by ``parseaddr()``. - *fieldvalues* is a sequence of header field values as might be returned by - :meth:`Message.get_all `. Here's a simple -- example that gets all the recipients of a message: -+ example that gets all the recipients of a message:: - - from email.utils import getaddresses - -@@ -104,25 +99,6 @@ of the new API. - resent_ccs = msg.get_all('resent-cc', []) - all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) - -- When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` -- is returned in its place. Other errors in parsing the list of -- addresses such as a fieldvalue seemingly parsing into multiple -- addresses may result in a list containing a single empty 2-tuple -- ``[('', '')]`` being returned rather than returning potentially -- invalid output. -- -- Example malformed input parsing: -- -- .. doctest:: -- -- >>> from email.utils import getaddresses -- >>> getaddresses(['alice@example.com ', 'me@example.com']) -- [('', '')] -- -- .. versionchanged:: 3.12 -- The 2-tuple of ``('', '')`` in the returned values when parsing -- fails were added as to address a security issue. -- - - .. function:: parsedate(date) - -Index: Python-3.11.4/Lib/email/utils.py -=================================================================== ---- Python-3.11.4.orig/Lib/email/utils.py -+++ Python-3.11.4/Lib/email/utils.py -@@ -106,54 +106,12 @@ def formataddr(pair, charset='utf-8'): - return address - - --def _pre_parse_validation(email_header_fields): -- accepted_values = [] -- for v in email_header_fields: -- s = v.replace('\\(', '').replace('\\)', '') -- if s.count('(') != s.count(')'): -- v = "('', '')" -- accepted_values.append(v) -- -- return accepted_values -- -- --def _post_parse_validation(parsed_email_header_tuples): -- accepted_values = [] -- # The parser would have parsed a correctly formatted domain-literal -- # The existence of an [ after parsing indicates a parsing failure -- for v in parsed_email_header_tuples: -- if '[' in v[1]: -- v = ('', '') -- accepted_values.append(v) -- -- return accepted_values -- - - def getaddresses(fieldvalues): -- """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. -- -- When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in -- its place. -- -- If the resulting list of parsed address is not the same as the number of -- fieldvalues in the input list a parsing error has occurred. A list -- containing a single empty 2-tuple [('', '')] is returned in its place. -- This is done to avoid invalid output. -- """ -- fieldvalues = [str(v) for v in fieldvalues] -- fieldvalues = _pre_parse_validation(fieldvalues) -- all = COMMASPACE.join(v for v in fieldvalues) -+ """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" -+ all = COMMASPACE.join(str(v) for v in fieldvalues) - a = _AddressList(all) -- result = _post_parse_validation(a.addresslist) -- -- n = 0 -- for v in fieldvalues: -- n += v.count(',') + 1 -- -- if len(result) != n: -- return [('', '')] -- -- return result -+ return a.addresslist - - - def _format_timetuple_and_zone(timetuple, zone): -@@ -254,18 +212,9 @@ def parseaddr(addr): - Return a tuple of realname and email address, unless the parse fails, in - which case return a 2-tuple of ('', ''). - """ -- if isinstance(addr, list): -- addr = addr[0] -- -- if not isinstance(addr, str): -- return ('', '') -- -- addr = _pre_parse_validation([addr])[0] -- addrs = _post_parse_validation(_AddressList(addr).addresslist) -- -- if not addrs or len(addrs) > 1: -- return ('', '') -- -+ addrs = _AddressList(addr).addresslist -+ if not addrs: -+ return '', '' - return addrs[0] - - -Index: Python-3.11.4/Lib/test/test_email/test_email.py -=================================================================== ---- Python-3.11.4.orig/Lib/test/test_email/test_email.py -+++ Python-3.11.4/Lib/test/test_email/test_email.py -@@ -3320,90 +3320,32 @@ Foo - [('Al Person', 'aperson@dom.ain'), - ('Bud Person', 'bperson@dom.ain')]) - -- def test_getaddresses_parsing_errors(self): -- """Test for parsing errors from CVE-2023-27043""" -- eq = self.assertEqual -- eq(utils.getaddresses(['alice@example.org(']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org)']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org<']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org>']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org@']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org,']), -- [('', 'alice@example.org'), ('', 'bob@example.com')]) -- eq(utils.getaddresses(['alice@example.org;']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org:']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org.']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org"']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org[']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org]']), -- [('', '')]) -- -- def test_parseaddr_parsing_errors(self): -- """Test for parsing errors from CVE-2023-27043""" -- eq = self.assertEqual -- eq(utils.parseaddr(['alice@example.org(']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org)']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org<']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org>']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org@']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org,']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org;']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org:']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org.']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org"']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org[']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org]']), -- ('', '')) -+ def test_getaddresses_comma_in_name(self): -+ """GH-106669 regression test.""" -+ self.assertEqual( -+ utils.getaddresses( -+ [ -+ '"Bud, Person" ', -+ 'aperson@dom.ain (Al Person)', -+ '"Mariusz Felisiak" ', -+ ] -+ ), -+ [ -+ ('Bud, Person', 'bperson@dom.ain'), -+ ('Al Person', 'aperson@dom.ain'), -+ ('Mariusz Felisiak', 'to@example.com'), -+ ], -+ ) - - def test_getaddresses_nasty(self): - eq = self.assertEqual - eq(utils.getaddresses(['foo: ;']), [('', '')]) -- eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) -+ eq(utils.getaddresses( -+ ['[]*-- =~$']), -+ [('', ''), ('', ''), ('', '*--')]) - eq(utils.getaddresses( - ['foo: ;', '"Jason R. Mastaler" ']), - [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) -- eq(utils.getaddresses( -- [r'Pete(A nice \) chap) ']), -- [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) -- eq(utils.getaddresses( -- ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), -- [('', '')]) -- eq(utils.getaddresses( -- ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), -- [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) -- eq(utils.getaddresses( -- ['John Doe ']), -- [('John Doe (comment)', 'jdoe@machine.example')]) -- eq(utils.getaddresses( -- ['"Mary Smith: Personal Account" ']), -- [('Mary Smith: Personal Account', 'smith@home.example')]) -- eq(utils.getaddresses( -- ['Undisclosed recipients:;']), -- [('', '')]) -- eq(utils.getaddresses( -- [r', "Giant; \"Big\" Box" ']), -- [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) - - def test_getaddresses_embedded_comment(self): - """Test proper handling of a nested comment""" -Index: Python-3.11.4/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -=================================================================== ---- Python-3.11.4.orig/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -+++ Python-3.11.4/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -@@ -1,3 +1,8 @@ -+Reverted the :mod:`email.utils` security improvement change released in -+3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail -+to parse email addresses with a comma in the quoted name field. -+See :gh:`106669`. -+ - CVE-2023-27043: Prevent :func:`email.utils.parseaddr` - and :func:`email.utils.getaddresses` from returning the realname portion of an - invalid RFC2822 email header in the email address portion of the 2-tuple diff --git a/python311.changes b/python311.changes index 944ef78..fde797a 100644 --- a/python311.changes +++ b/python311.changes @@ -1,3 +1,11 @@ +------------------------------------------------------------------- +Mon Dec 18 16:20:58 UTC 2023 - Matej Cepl + +- Refresh CVE-2023-27043-email-parsing-errors.patch to + gh#python/cpython!111116, fixing bsc#1210638 (CVE-2023-27043). +- Thus we can remove Revert-gh105127-left-tests.patch, which is + now useless. + ------------------------------------------------------------------- Wed Nov 15 09:06:16 UTC 2023 - Daniel Garcia diff --git a/python311.spec b/python311.spec index 05e3990..53b1560 100644 --- a/python311.spec +++ b/python311.spec @@ -165,9 +165,6 @@ Patch39: skip_if_buildbot-extend.patch # Detect email address parsing errors and return empty tuple to # indicate the parsing error (old API) Patch40: CVE-2023-27043-email-parsing-errors.patch -# PATCH-FIX-UPSTREAM Revert-gh105127-left-tests.patch bsc#1210638 mcepl@suse.com -# Partially revert previous patch -Patch41: Revert-gh105127-left-tests.patch BuildRequires: autoconf-archive BuildRequires: automake BuildRequires: fdupes @@ -428,7 +425,6 @@ other applications. %patch36 -p1 %patch39 -p1 %patch40 -p1 -%patch41 -p1 # drop Autoconf version requirement sed -i 's/^AC_PREREQ/dnl AC_PREREQ/' configure.ac