From 727f4c9b01ba0826e283dada3ef85f7aa32680ed33c6a52cc7e1b9cbd230db38 Mon Sep 17 00:00:00 2001 From: Matej Cepl Date: Tue, 19 Dec 2023 15:22:13 +0000 Subject: [PATCH] Accepting request 1134053 from devel:languages:python:Factory revert OBS-URL: https://build.opensuse.org/request/show/1134053 OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python311?expand=0&rev=93 --- CVE-2023-27043-email-parsing-errors.patch | 548 +++++++--------------- Revert-gh105127-left-tests.patch | 283 +++++++++++ python311.changes | 8 - python311.spec | 4 + 4 files changed, 455 insertions(+), 388 deletions(-) create mode 100644 Revert-gh105127-left-tests.patch diff --git a/CVE-2023-27043-email-parsing-errors.patch b/CVE-2023-27043-email-parsing-errors.patch index 6e4cc64..7aec4a2 100644 --- a/CVE-2023-27043-email-parsing-errors.patch +++ b/CVE-2023-27043-email-parsing-errors.patch @@ -1,185 +1,74 @@ --- - Doc/library/email.utils.rst | 19 - - Lib/email/utils.py | 151 +++++++- - Lib/test/test_email/test_email.py | 187 +++++++++- - Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 - 4 files changed, 344 insertions(+), 21 deletions(-) + Doc/library/email.utils.rst | 26 +++ + Lib/email/utils.py | 63 +++++++ + Lib/test/test_email/test_email.py | 81 +++++++++- + Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 4 + 4 files changed, 164 insertions(+), 10 deletions(-) ---- a/Doc/library/email.utils.rst -+++ b/Doc/library/email.utils.rst -@@ -60,13 +60,18 @@ of the new API. - begins with angle brackets, they are stripped off. - - --.. function:: parseaddr(address) -+.. function:: parseaddr(address, *, strict=True) - - Parse address -- which should be the value of some address-containing field such - as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and +Index: Python-3.11.4/Doc/library/email.utils.rst +=================================================================== +--- Python-3.11.4.orig/Doc/library/email.utils.rst ++++ Python-3.11.4/Doc/library/email.utils.rst +@@ -67,6 +67,11 @@ of the new API. *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. -+ If *strict* is true, use a strict parser which rejects malformed inputs. -+ -+ .. versionchanged:: 3.13 -+ Add *strict* optional parameter and reject malformed inputs by default. ++ .. versionchanged:: 3.12 ++ For security reasons, addresses that were ambiguous and could parse into ++ multiple different addresses now cause ``('', '')`` to be returned ++ instead of only one of the *potential* addresses. + .. function:: formataddr(pair, charset='utf-8') -@@ -84,12 +89,15 @@ of the new API. - Added the *charset* option. - - --.. function:: getaddresses(fieldvalues) -+.. function:: getaddresses(fieldvalues, *, strict=True) - +@@ -89,7 +94,7 @@ of the new API. This method returns a list of 2-tuples of the form returned by ``parseaddr()``. *fieldvalues* is a sequence of header field values as might be returned by -- :meth:`Message.get_all `. Here's a simple + :meth:`Message.get_all `. Here's a simple - example that gets all the recipients of a message:: -+ :meth:`Message.get_all `. -+ -+ If *strict* is true, use a strict parser which rejects malformed inputs. -+ -+ Here's a simple example that gets all the recipients of a message:: ++ example that gets all the recipients of a message: from email.utils import getaddresses -@@ -99,6 +107,9 @@ of the new API. +@@ -99,6 +104,25 @@ of the new API. resent_ccs = msg.get_all('resent-cc', []) all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) -+ .. versionchanged:: 3.13 -+ Add *strict* optional parameter and reject malformed inputs by default. ++ When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` ++ is returned in its place. Other errors in parsing the list of ++ addresses such as a fieldvalue seemingly parsing into multiple ++ addresses may result in a list containing a single empty 2-tuple ++ ``[('', '')]`` being returned rather than returning potentially ++ invalid output. ++ ++ Example malformed input parsing: ++ ++ .. doctest:: ++ ++ >>> from email.utils import getaddresses ++ >>> getaddresses(['alice@example.com ', 'me@example.com']) ++ [('', '')] ++ ++ .. versionchanged:: 3.12 ++ The 2-tuple of ``('', '')`` in the returned values when parsing ++ fails were added as to address a security issue. + .. function:: parsedate(date) ---- a/Lib/email/utils.py -+++ b/Lib/email/utils.py -@@ -48,6 +48,7 @@ TICK = "'" - specialsre = re.compile(r'[][\\()<>@,:;".]') - escapesre = re.compile(r'[\\"]') - -+ - def _has_surrogates(s): - """Return True if s contains surrogate-escaped binary data.""" - # This check is based on the fact that unless there are surrogates, utf8 -@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'): +Index: Python-3.11.4/Lib/email/utils.py +=================================================================== +--- Python-3.11.4.orig/Lib/email/utils.py ++++ Python-3.11.4/Lib/email/utils.py +@@ -106,12 +106,54 @@ def formataddr(pair, charset='utf-8'): return address -+def _iter_escaped_chars(addr): -+ pos = 0 -+ escape = False -+ for pos, ch in enumerate(addr): -+ if escape: -+ yield (pos, '\\' + ch) -+ escape = False -+ elif ch == '\\': -+ escape = True -+ else: -+ yield (pos, ch) -+ if escape: -+ yield (pos, '\\') -+ -+ -+def _strip_quoted_realnames(addr): -+ """Strip real names between quotes.""" -+ if '"' not in addr: -+ # Fast path -+ return addr -+ -+ start = 0 -+ open_pos = None -+ result = [] -+ for pos, ch in _iter_escaped_chars(addr): -+ if ch == '"': -+ if open_pos is None: -+ open_pos = pos -+ else: -+ if start != open_pos: -+ result.append(addr[start:open_pos]) -+ start = pos + 1 -+ open_pos = None - --def getaddresses(fieldvalues): -- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" -- all = COMMASPACE.join(str(v) for v in fieldvalues) -- a = _AddressList(all) -- return a.addresslist -+ if start < len(addr): -+ result.append(addr[start:]) -+ -+ return ''.join(result) -+ -+ -+supports_strict_parsing = True -+ -+def getaddresses(fieldvalues, *, strict=True): -+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. -+ -+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in -+ its place. -+ -+ If strict is true, use a strict parser which rejects malformed inputs. -+ """ -+ -+ # If strict is true, if the resulting list of parsed addresses is greater -+ # than the number of fieldvalues in the input list, a parsing error has -+ # occurred and consequently a list containing a single empty 2-tuple [('', -+ # '')] is returned in its place. This is done to avoid invalid output. -+ # -+ # Malformed input: getaddresses(['alice@example.com ']) -+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] -+ # Safe output: [('', '')] -+ -+ if not strict: -+ all = COMMASPACE.join(str(v) for v in fieldvalues) -+ a = _AddressList(all) -+ return a.addresslist -+ -+ fieldvalues = [str(v) for v in fieldvalues] -+ fieldvalues = _pre_parse_validation(fieldvalues) -+ addr = COMMASPACE.join(fieldvalues) -+ a = _AddressList(addr) -+ result = _post_parse_validation(a.addresslist) -+ -+ # Treat output as invalid if the number of addresses is not equal to the -+ # expected number of addresses. -+ n = 0 -+ for v in fieldvalues: -+ # When a comma is used in the Real Name part it is not a deliminator. -+ # So strip those out before counting the commas. -+ v = _strip_quoted_realnames(v) -+ # Expected number of addresses: 1 + number of commas -+ n += 1 + v.count(',') -+ if len(result) != n: -+ return [('', '')] -+ -+ return result -+ -+ -+def _check_parenthesis(addr): -+ # Ignore parenthesis in quoted real names. -+ addr = _strip_quoted_realnames(addr) -+ -+ opens = 0 -+ for pos, ch in _iter_escaped_chars(addr): -+ if ch == '(': -+ opens += 1 -+ elif ch == ')': -+ opens -= 1 -+ if opens < 0: -+ return False -+ return (opens == 0) -+ -+ +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: -+ if not _check_parenthesis(v): ++ s = v.replace('\\(', '').replace('\\)', '') ++ if s.count('(') != s.count(')'): + v = "('', '')" + accepted_values.append(v) + @@ -196,32 +85,46 @@ + accepted_values.append(v) + + return accepted_values ++ + + def getaddresses(fieldvalues): +- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" +- all = COMMASPACE.join(str(v) for v in fieldvalues) ++ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. ++ ++ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in ++ its place. ++ ++ If the resulting list of parsed address is not the same as the number of ++ fieldvalues in the input list a parsing error has occurred. A list ++ containing a single empty 2-tuple [('', '')] is returned in its place. ++ This is done to avoid invalid output. ++ """ ++ fieldvalues = [str(v) for v in fieldvalues] ++ fieldvalues = _pre_parse_validation(fieldvalues) ++ all = COMMASPACE.join(v for v in fieldvalues) + a = _AddressList(all) +- return a.addresslist ++ result = _post_parse_validation(a.addresslist) ++ ++ n = 0 ++ for v in fieldvalues: ++ n += v.count(',') + 1 ++ ++ if len(result) != n: ++ return [('', '')] ++ ++ return result def _format_timetuple_and_zone(timetuple, zone): -@@ -205,16 +321,33 @@ def parsedate_to_datetime(data): - tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) - - --def parseaddr(addr): -+def parseaddr(addr, *, strict=True): - """ - Parse addr into its constituent realname and email address parts. - +@@ -212,9 +254,18 @@ def parseaddr(addr): Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). -+ -+ If strict is True, use a strict parser which rejects malformed inputs. """ - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' -+ if not strict: -+ addrs = _AddressList(addr).addresslist -+ if not addrs: -+ return ('', '') -+ return addrs[0] -+ + if isinstance(addr, list): + addr = addr[0] + @@ -237,225 +140,110 @@ return addrs[0] ---- a/Lib/test/test_email/test_email.py -+++ b/Lib/test/test_email/test_email.py -@@ -17,6 +17,7 @@ from unittest.mock import patch - - import email - import email.policy -+import email.utils - - from email.charset import Charset - from email.generator import Generator, DecodedGenerator, BytesGenerator -@@ -3321,15 +3322,137 @@ Foo +Index: Python-3.11.4/Lib/test/test_email/test_email.py +=================================================================== +--- Python-3.11.4.orig/Lib/test/test_email/test_email.py ++++ Python-3.11.4/Lib/test/test_email/test_email.py +@@ -3320,15 +3320,90 @@ Foo [('Al Person', 'aperson@dom.ain'), ('Bud Person', 'bperson@dom.ain')]) -+ def test_parsing_errors(self): -+ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" -+ alice = 'alice@example.org' -+ bob = 'bob@example.com' -+ empty = ('', '') ++ def test_getaddresses_parsing_errors(self): ++ """Test for parsing errors from CVE-2023-27043""" ++ eq = self.assertEqual ++ eq(utils.getaddresses(['alice@example.org(']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org)']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org<']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org>']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org@']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org,']), ++ [('', 'alice@example.org'), ('', 'bob@example.com')]) ++ eq(utils.getaddresses(['alice@example.org;']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org:']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org.']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org"']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org[']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org]']), ++ [('', '')]) + -+ # Test utils.getaddresses() and utils.parseaddr() on malformed email -+ # addresses: default behavior (strict=True) rejects malformed address, -+ # and strict=False which tolerates malformed address. -+ for invalid_separator, expected_non_strict in ( -+ ('(', [(f'<{bob}>', alice)]), -+ (')', [('', alice), empty, ('', bob)]), -+ ('<', [('', alice), empty, ('', bob), empty]), -+ ('>', [('', alice), empty, ('', bob)]), -+ ('[', [('', f'{alice}[<{bob}>]')]), -+ (']', [('', alice), empty, ('', bob)]), -+ ('@', [empty, empty, ('', bob)]), -+ (';', [('', alice), empty, ('', bob)]), -+ (':', [('', alice), ('', bob)]), -+ ('.', [('', alice + '.'), ('', bob)]), -+ ('"', [('', alice), ('', f'<{bob}>')]), -+ ): -+ address = f'{alice}{invalid_separator}<{bob}>' -+ with self.subTest(address=address): -+ self.assertEqual(utils.getaddresses([address]), -+ [empty]) -+ self.assertEqual(utils.getaddresses([address], strict=False), -+ expected_non_strict) -+ -+ self.assertEqual(utils.parseaddr([address]), -+ empty) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Comma (',') is treated differently depending on strict parameter. -+ # Comma without quotes. -+ address = f'{alice},<{bob}>' -+ self.assertEqual(utils.getaddresses([address]), -+ [('', alice), ('', bob)]) -+ self.assertEqual(utils.getaddresses([address], strict=False), -+ [('', alice), ('', bob)]) -+ self.assertEqual(utils.parseaddr([address]), -+ empty) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Real name between quotes containing comma. -+ address = '"Alice, alice@example.org" ' -+ expected_strict = ('Alice, alice@example.org', 'bob@example.com') -+ self.assertEqual(utils.getaddresses([address]), [expected_strict]) -+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) -+ self.assertEqual(utils.parseaddr([address]), expected_strict) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Valid parenthesis in comments. -+ address = 'alice@example.org (Alice)' -+ expected_strict = ('Alice', 'alice@example.org') -+ self.assertEqual(utils.getaddresses([address]), [expected_strict]) -+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) -+ self.assertEqual(utils.parseaddr([address]), expected_strict) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Invalid parenthesis in comments. -+ address = 'alice@example.org )Alice(' -+ self.assertEqual(utils.getaddresses([address]), [empty]) -+ self.assertEqual(utils.getaddresses([address], strict=False), -+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) -+ self.assertEqual(utils.parseaddr([address]), empty) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Two addresses with quotes separated by comma. -+ address = '"Jane Doe" , "John Doe" ' -+ self.assertEqual(utils.getaddresses([address]), -+ [('Jane Doe', 'jane@example.net'), -+ ('John Doe', 'john@example.net')]) -+ self.assertEqual(utils.getaddresses([address], strict=False), -+ [('Jane Doe', 'jane@example.net'), -+ ('John Doe', 'john@example.net')]) -+ self.assertEqual(utils.parseaddr([address]), empty) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Test email.utils.supports_strict_parsing attribute -+ self.assertEqual(email.utils.supports_strict_parsing, True) ++ def test_parseaddr_parsing_errors(self): ++ """Test for parsing errors from CVE-2023-27043""" ++ eq = self.assertEqual ++ eq(utils.parseaddr(['alice@example.org(']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org)']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org<']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org>']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org@']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org,']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org;']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org:']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org.']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org"']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org[']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org]']), ++ ('', '')) + def test_getaddresses_nasty(self): -- eq = self.assertEqual -- eq(utils.getaddresses(['foo: ;']), [('', '')]) + eq = self.assertEqual + eq(utils.getaddresses(['foo: ;']), [('', '')]) - eq(utils.getaddresses( - ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) -- eq(utils.getaddresses( -- ['foo: ;', '"Jason R. Mastaler" ']), -- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) -+ for addresses, expected in ( -+ (['"Sürname, Firstname" '], -+ [('Sürname, Firstname', 'to@example.com')]), -+ -+ (['foo: ;'], -+ [('', '')]), -+ -+ (['foo: ;', '"Jason R. Mastaler" '], -+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), -+ -+ ([r'Pete(A nice \) chap) '], -+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), -+ -+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], -+ [('', '')]), -+ -+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], -+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), -+ -+ (['John Doe '], -+ [('John Doe (comment)', 'jdoe@machine.example')]), -+ -+ (['"Mary Smith: Personal Account" '], -+ [('Mary Smith: Personal Account', 'smith@home.example')]), -+ -+ (['Undisclosed recipients:;'], -+ [('', '')]), -+ -+ ([r', "Giant; \"Big\" Box" '], -+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), -+ ): -+ with self.subTest(addresses=addresses): -+ self.assertEqual(utils.getaddresses(addresses), -+ expected) -+ self.assertEqual(utils.getaddresses(addresses, strict=False), -+ expected) -+ -+ addresses = ['[]*-- =~$'] -+ self.assertEqual(utils.getaddresses(addresses), -+ [('', '')]) -+ self.assertEqual(utils.getaddresses(addresses, strict=False), -+ [('', ''), ('', ''), ('', '*--')]) ++ eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) + eq(utils.getaddresses( + ['foo: ;', '"Jason R. Mastaler" ']), + [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) ++ eq(utils.getaddresses( ++ [r'Pete(A nice \) chap) ']), ++ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) ++ eq(utils.getaddresses( ++ ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), ++ [('', '')]) ++ eq(utils.getaddresses( ++ ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), ++ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) ++ eq(utils.getaddresses( ++ ['John Doe ']), ++ [('John Doe (comment)', 'jdoe@machine.example')]) ++ eq(utils.getaddresses( ++ ['"Mary Smith: Personal Account" ']), ++ [('Mary Smith: Personal Account', 'smith@home.example')]) ++ eq(utils.getaddresses( ++ ['Undisclosed recipients:;']), ++ [('', '')]) ++ eq(utils.getaddresses( ++ [r', "Giant; \"Big\" Box" ']), ++ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" -@@ -3520,6 +3643,54 @@ multipart/report - m = cls(*constructor, policy=email.policy.default) - self.assertIs(m.policy, email.policy.default) - -+ def test_iter_escaped_chars(self): -+ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')), -+ [(0, 'a'), -+ (2, '\\\\'), -+ (3, 'b'), -+ (5, '\\"'), -+ (6, 'c'), -+ (8, '\\\\'), -+ (9, '"'), -+ (10, 'd')]) -+ self.assertEqual(list(utils._iter_escaped_chars('a\\')), -+ [(0, 'a'), (1, '\\')]) -+ -+ def test_strip_quoted_realnames(self): -+ def check(addr, expected): -+ self.assertEqual(utils._strip_quoted_realnames(addr), expected) -+ -+ check('"Jane Doe" , "John Doe" ', -+ ' , ') -+ check(r'"Jane \"Doe\"." ', -+ ' ') -+ -+ # special cases -+ check(r'before"name"after', 'beforeafter') -+ check(r'before"name"', 'before') -+ check(r'b"name"', 'b') # single char -+ check(r'"name"after', 'after') -+ check(r'"name"a', 'a') # single char -+ check(r'"name"', '') -+ -+ # no change -+ for addr in ( -+ 'Jane Doe , John Doe ', -+ 'lone " quote', -+ ): -+ self.assertEqual(utils._strip_quoted_realnames(addr), addr) -+ -+ -+ def test_check_parenthesis(self): -+ addr = 'alice@example.net' -+ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)')) -+ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice(')) -+ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))')) -+ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)')) -+ -+ # Ignore real name between quotes -+ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}')) -+ - - # Test the iterator/generators - class TestIterators(TestEmailBase): +Index: Python-3.11.4/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst +=================================================================== --- /dev/null -+++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst -@@ -0,0 +1,8 @@ -+:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now -+return ``('', '')`` 2-tuples in more situations where invalid email -+addresses are encountered instead of potentially inaccurate values. Add -+optional *strict* parameter to these two functions: use ``strict=False`` to -+get the old behavior, accept malformed inputs. -+``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check -+if the *strict* paramater is available. Patch by Thomas Dwyer and Victor -+Stinner to improve the CVE-2023-27043 fix. ++++ Python-3.11.4/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst +@@ -0,0 +1,4 @@ ++CVE-2023-27043: Prevent :func:`email.utils.parseaddr` ++and :func:`email.utils.getaddresses` from returning the realname portion of an ++invalid RFC2822 email header in the email address portion of the 2-tuple ++returned after being parsed by :class:`email._parseaddr.AddressList`. diff --git a/Revert-gh105127-left-tests.patch b/Revert-gh105127-left-tests.patch new file mode 100644 index 0000000..87ce40a --- /dev/null +++ b/Revert-gh105127-left-tests.patch @@ -0,0 +1,283 @@ +From 4288c623d62cf90d8e4444facb3379fb06d01140 Mon Sep 17 00:00:00 2001 +From: "Gregory P. Smith" +Date: Thu, 20 Jul 2023 20:30:52 -0700 +Subject: [PATCH] [3.12] gh-106669: Revert "gh-102988: Detect email address + parsing errors ... (GH-105127)" (GH-106733) + +This reverts commit 18dfbd035775c15533d13a98e56b1d2bf5c65f00. +Adds a regression test from the issue. + +See https://github.com/python/cpython/issues/106669.. +(cherry picked from commit a31dea1feb61793e48fa9aa5014f358352205c1d) + +Co-authored-by: Gregory P. Smith +--- + Doc/library/email.utils.rst | 26 -- + Lib/email/utils.py | 63 ------ + Lib/test/test_email/test_email.py | 96 +--------- + Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 5 + 4 files changed, 31 insertions(+), 159 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst + +Index: Python-3.11.4/Doc/library/email.utils.rst +=================================================================== +--- Python-3.11.4.orig/Doc/library/email.utils.rst ++++ Python-3.11.4/Doc/library/email.utils.rst +@@ -67,11 +67,6 @@ of the new API. + *email address* parts. Returns a tuple of that information, unless the parse + fails, in which case a 2-tuple of ``('', '')`` is returned. + +- .. versionchanged:: 3.12 +- For security reasons, addresses that were ambiguous and could parse into +- multiple different addresses now cause ``('', '')`` to be returned +- instead of only one of the *potential* addresses. +- + + .. function:: formataddr(pair, charset='utf-8') + +@@ -94,7 +89,7 @@ of the new API. + This method returns a list of 2-tuples of the form returned by ``parseaddr()``. + *fieldvalues* is a sequence of header field values as might be returned by + :meth:`Message.get_all `. Here's a simple +- example that gets all the recipients of a message: ++ example that gets all the recipients of a message:: + + from email.utils import getaddresses + +@@ -104,25 +99,6 @@ of the new API. + resent_ccs = msg.get_all('resent-cc', []) + all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + +- When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` +- is returned in its place. Other errors in parsing the list of +- addresses such as a fieldvalue seemingly parsing into multiple +- addresses may result in a list containing a single empty 2-tuple +- ``[('', '')]`` being returned rather than returning potentially +- invalid output. +- +- Example malformed input parsing: +- +- .. doctest:: +- +- >>> from email.utils import getaddresses +- >>> getaddresses(['alice@example.com ', 'me@example.com']) +- [('', '')] +- +- .. versionchanged:: 3.12 +- The 2-tuple of ``('', '')`` in the returned values when parsing +- fails were added as to address a security issue. +- + + .. function:: parsedate(date) + +Index: Python-3.11.4/Lib/email/utils.py +=================================================================== +--- Python-3.11.4.orig/Lib/email/utils.py ++++ Python-3.11.4/Lib/email/utils.py +@@ -106,54 +106,12 @@ def formataddr(pair, charset='utf-8'): + return address + + +-def _pre_parse_validation(email_header_fields): +- accepted_values = [] +- for v in email_header_fields: +- s = v.replace('\\(', '').replace('\\)', '') +- if s.count('(') != s.count(')'): +- v = "('', '')" +- accepted_values.append(v) +- +- return accepted_values +- +- +-def _post_parse_validation(parsed_email_header_tuples): +- accepted_values = [] +- # The parser would have parsed a correctly formatted domain-literal +- # The existence of an [ after parsing indicates a parsing failure +- for v in parsed_email_header_tuples: +- if '[' in v[1]: +- v = ('', '') +- accepted_values.append(v) +- +- return accepted_values +- + + def getaddresses(fieldvalues): +- """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. +- +- When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in +- its place. +- +- If the resulting list of parsed address is not the same as the number of +- fieldvalues in the input list a parsing error has occurred. A list +- containing a single empty 2-tuple [('', '')] is returned in its place. +- This is done to avoid invalid output. +- """ +- fieldvalues = [str(v) for v in fieldvalues] +- fieldvalues = _pre_parse_validation(fieldvalues) +- all = COMMASPACE.join(v for v in fieldvalues) ++ """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" ++ all = COMMASPACE.join(str(v) for v in fieldvalues) + a = _AddressList(all) +- result = _post_parse_validation(a.addresslist) +- +- n = 0 +- for v in fieldvalues: +- n += v.count(',') + 1 +- +- if len(result) != n: +- return [('', '')] +- +- return result ++ return a.addresslist + + + def _format_timetuple_and_zone(timetuple, zone): +@@ -254,18 +212,9 @@ def parseaddr(addr): + Return a tuple of realname and email address, unless the parse fails, in + which case return a 2-tuple of ('', ''). + """ +- if isinstance(addr, list): +- addr = addr[0] +- +- if not isinstance(addr, str): +- return ('', '') +- +- addr = _pre_parse_validation([addr])[0] +- addrs = _post_parse_validation(_AddressList(addr).addresslist) +- +- if not addrs or len(addrs) > 1: +- return ('', '') +- ++ addrs = _AddressList(addr).addresslist ++ if not addrs: ++ return '', '' + return addrs[0] + + +Index: Python-3.11.4/Lib/test/test_email/test_email.py +=================================================================== +--- Python-3.11.4.orig/Lib/test/test_email/test_email.py ++++ Python-3.11.4/Lib/test/test_email/test_email.py +@@ -3320,90 +3320,32 @@ Foo + [('Al Person', 'aperson@dom.ain'), + ('Bud Person', 'bperson@dom.ain')]) + +- def test_getaddresses_parsing_errors(self): +- """Test for parsing errors from CVE-2023-27043""" +- eq = self.assertEqual +- eq(utils.getaddresses(['alice@example.org(']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org)']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org<']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org>']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org@']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org,']), +- [('', 'alice@example.org'), ('', 'bob@example.com')]) +- eq(utils.getaddresses(['alice@example.org;']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org:']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org.']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org"']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org[']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org]']), +- [('', '')]) +- +- def test_parseaddr_parsing_errors(self): +- """Test for parsing errors from CVE-2023-27043""" +- eq = self.assertEqual +- eq(utils.parseaddr(['alice@example.org(']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org)']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org<']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org>']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org@']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org,']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org;']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org:']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org.']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org"']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org[']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org]']), +- ('', '')) ++ def test_getaddresses_comma_in_name(self): ++ """GH-106669 regression test.""" ++ self.assertEqual( ++ utils.getaddresses( ++ [ ++ '"Bud, Person" ', ++ 'aperson@dom.ain (Al Person)', ++ '"Mariusz Felisiak" ', ++ ] ++ ), ++ [ ++ ('Bud, Person', 'bperson@dom.ain'), ++ ('Al Person', 'aperson@dom.ain'), ++ ('Mariusz Felisiak', 'to@example.com'), ++ ], ++ ) + + def test_getaddresses_nasty(self): + eq = self.assertEqual + eq(utils.getaddresses(['foo: ;']), [('', '')]) +- eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) ++ eq(utils.getaddresses( ++ ['[]*-- =~$']), ++ [('', ''), ('', ''), ('', '*--')]) + eq(utils.getaddresses( + ['foo: ;', '"Jason R. Mastaler" ']), + [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) +- eq(utils.getaddresses( +- [r'Pete(A nice \) chap) ']), +- [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) +- eq(utils.getaddresses( +- ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), +- [('', '')]) +- eq(utils.getaddresses( +- ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), +- [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) +- eq(utils.getaddresses( +- ['John Doe ']), +- [('John Doe (comment)', 'jdoe@machine.example')]) +- eq(utils.getaddresses( +- ['"Mary Smith: Personal Account" ']), +- [('Mary Smith: Personal Account', 'smith@home.example')]) +- eq(utils.getaddresses( +- ['Undisclosed recipients:;']), +- [('', '')]) +- eq(utils.getaddresses( +- [r', "Giant; \"Big\" Box" ']), +- [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) + + def test_getaddresses_embedded_comment(self): + """Test proper handling of a nested comment""" +Index: Python-3.11.4/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst +=================================================================== +--- Python-3.11.4.orig/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst ++++ Python-3.11.4/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst +@@ -1,3 +1,8 @@ ++Reverted the :mod:`email.utils` security improvement change released in ++3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail ++to parse email addresses with a comma in the quoted name field. ++See :gh:`106669`. ++ + CVE-2023-27043: Prevent :func:`email.utils.parseaddr` + and :func:`email.utils.getaddresses` from returning the realname portion of an + invalid RFC2822 email header in the email address portion of the 2-tuple diff --git a/python311.changes b/python311.changes index 324f3d9..af0f2b6 100644 --- a/python311.changes +++ b/python311.changes @@ -1,11 +1,3 @@ -------------------------------------------------------------------- -Mon Dec 18 16:20:58 UTC 2023 - Matej Cepl - -- Refresh CVE-2023-27043-email-parsing-errors.patch to - gh#python/cpython!111116, fixing bsc#1210638 (CVE-2023-27043). -- Thus we can remove Revert-gh105127-left-tests.patch, which is - now useless. - ------------------------------------------------------------------- Fri Dec 15 10:04:33 UTC 2023 - Daniel Garcia diff --git a/python311.spec b/python311.spec index 16f2357..0e3c740 100644 --- a/python311.spec +++ b/python311.spec @@ -165,6 +165,9 @@ Patch39: skip_if_buildbot-extend.patch # Detect email address parsing errors and return empty tuple to # indicate the parsing error (old API) Patch40: CVE-2023-27043-email-parsing-errors.patch +# PATCH-FIX-UPSTREAM Revert-gh105127-left-tests.patch bsc#1210638 mcepl@suse.com +# Partially revert previous patch +Patch41: Revert-gh105127-left-tests.patch BuildRequires: autoconf-archive BuildRequires: automake BuildRequires: fdupes @@ -425,6 +428,7 @@ other applications. %patch36 -p1 %patch39 -p1 %patch40 -p1 +%patch41 -p1 # drop Autoconf version requirement sed -i 's/^AC_PREREQ/dnl AC_PREREQ/' configure.ac