From 4288c623d62cf90d8e4444facb3379fb06d01140 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 20 Jul 2023 20:30:52 -0700 Subject: [PATCH] [3.12] gh-106669: Revert "gh-102988: Detect email address parsing errors ... (GH-105127)" (GH-106733) This reverts commit 18dfbd035775c15533d13a98e56b1d2bf5c65f00. Adds a regression test from the issue. See https://github.com/python/cpython/issues/106669.. (cherry picked from commit a31dea1feb61793e48fa9aa5014f358352205c1d) Co-authored-by: Gregory P. Smith --- Doc/library/email.utils.rst | 24 -- Lib/email/test/test_email.py | 113 +++++----- Lib/email/test/test_email_renamed.py | 2 Lib/email/utils.py | 66 ----- Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 5 5 files changed, 77 insertions(+), 133 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst @@ -63,11 +63,6 @@ There are several useful utilities provi :func:`time.mktime`; otherwise ``None`` will be returned. Note that indexes 6, 7, and 8 of the result tuple are not usable. - .. versionchanged:: 3.12 - For security reasons, addresses that were ambiguous and could parse into - multiple different addresses now cause ``('', '')`` to be returned - instead of only one of the *potential* addresses. - .. function:: parsedate_tz(date) @@ -108,25 +103,6 @@ There are several useful utilities provi .. versionadded:: 2.4 - When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` - is returned in its place. Other errors in parsing the list of - addresses such as a fieldvalue seemingly parsing into multiple - addresses may result in a list containing a single empty 2-tuple - ``[('', '')]`` being returned rather than returning potentially - invalid output. - - Example malformed input parsing: - - .. doctest:: - - >>> from email.utils import getaddresses - >>> getaddresses(['alice@example.com ', 'me@example.com']) - [('', '')] - - .. versionchanged:: 3.12 - The 2-tuple of ``('', '')`` in the returned values when parsing - fails were added as to address a security issue. - .. function:: make_msgid([idstring]) --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -30,7 +30,7 @@ from email.MIMEImage import MIMEImage from email.MIMEBase import MIMEBase from email.MIMEMessage import MIMEMessage from email.MIMEMultipart import MIMEMultipart -from email import Utils +from email import utils from email import Errors from email import Encoders from email import Iterators @@ -2236,57 +2236,57 @@ class TestMiscellaneous(TestEmailBase): def test_formatdate(self): now = time.time() - self.assertEqual(Utils.parsedate(Utils.formatdate(now))[:6], + self.assertEqual(utils.parsedate(utils.formatdate(now))[:6], time.gmtime(now)[:6]) def test_formatdate_localtime(self): now = time.time() self.assertEqual( - Utils.parsedate(Utils.formatdate(now, localtime=True))[:6], + utils.parsedate(utils.formatdate(now, localtime=True))[:6], time.localtime(now)[:6]) def test_formatdate_usegmt(self): now = time.time() self.assertEqual( - Utils.formatdate(now, localtime=False), + utils.formatdate(now, localtime=False), time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) self.assertEqual( - Utils.formatdate(now, localtime=False, usegmt=True), + utils.formatdate(now, localtime=False, usegmt=True), time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) def test_parsedate_none(self): - self.assertEqual(Utils.parsedate(''), None) + self.assertEqual(utils.parsedate(''), None) def test_parsedate_compact(self): # The FWS after the comma is optional - self.assertEqual(Utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'), - Utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800')) + self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'), + utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800')) def test_parsedate_no_dayofweek(self): eq = self.assertEqual - eq(Utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'), + eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'), (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800)) def test_parsedate_compact_no_dayofweek(self): eq = self.assertEqual - eq(Utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), + eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) def test_parsedate_acceptable_to_time_functions(self): eq = self.assertEqual - timetup = Utils.parsedate('5 Feb 2003 13:47:26 -0800') + timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800') t = int(time.mktime(timetup)) eq(time.localtime(t)[:6], timetup[:6]) eq(int(time.strftime('%Y', timetup)), 2003) - timetup = Utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') + timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') t = int(time.mktime(timetup[:9])) eq(time.localtime(t)[:6], timetup[:6]) eq(int(time.strftime('%Y', timetup[:9])), 2003) def test_mktime_tz(self): - self.assertEqual(Utils.mktime_tz((1970, 1, 1, 0, 0, 0, + self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, -1, -1, -1, 0)), 0) - self.assertEqual(Utils.mktime_tz((1970, 1, 1, 0, 0, 0, + self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, -1, -1, -1, 1234)), -1234) def test_parsedate_y2k(self): @@ -2297,58 +2297,58 @@ class TestMiscellaneous(TestEmailBase): obsoletes RFC822) requires four-digit years. """ - self.assertEqual(Utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), - Utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) - self.assertEqual(Utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), - Utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) + self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), + utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) + self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), + utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) def test_parseaddr_empty(self): - self.assertEqual(Utils.parseaddr('<>'), ('', '')) - self.assertEqual(Utils.formataddr(Utils.parseaddr('<>')), '') + self.assertEqual(utils.parseaddr('<>'), ('', '')) + self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') def test_parseaddr_multiple_domains(self): self.assertEqual( - Utils.parseaddr('a@b@c'), + utils.parseaddr('a@b@c'), ('', '') ) self.assertEqual( - Utils.parseaddr('a@b.c@c'), + utils.parseaddr('a@b.c@c'), ('', '') ) self.assertEqual( - Utils.parseaddr('a@172.17.0.1@c'), + utils.parseaddr('a@172.17.0.1@c'), ('', '') ) def test_noquote_dump(self): self.assertEqual( - Utils.formataddr(('A Silly Person', 'person@dom.ain')), + utils.formataddr(('A Silly Person', 'person@dom.ain')), 'A Silly Person ') def test_escape_dump(self): self.assertEqual( - Utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), + utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')), r'"A \(Very\) Silly Person" ') a = r'A \(Special\) Person' b = 'person@dom.ain' - self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b)) + self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) def test_escape_backslashes(self): self.assertEqual( - Utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')), + utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')), r'"Arthur \\Backslash\\ Foobar" ') a = r'Arthur \Backslash\ Foobar' b = 'person@dom.ain' - self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b)) + self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) def test_name_with_dot(self): x = 'John X. Doe ' y = '"John X. Doe" ' a, b = ('John X. Doe', 'jxd@example.com') - self.assertEqual(Utils.parseaddr(x), (a, b)) - self.assertEqual(Utils.parseaddr(y), (a, b)) + self.assertEqual(utils.parseaddr(x), (a, b)) + self.assertEqual(utils.parseaddr(y), (a, b)) # formataddr() quotes the name if there's a dot in it - self.assertEqual(Utils.formataddr((a, b)), y) + self.assertEqual(utils.formataddr((a, b)), y) def test_parseaddr_preserves_quoted_pairs_in_addresses(self): # issue 10005. Note that in the third test the second pair of @@ -2361,31 +2361,31 @@ class TestMiscellaneous(TestEmailBase): # not appear in an address outside of a quoted string. It is probably # a sensible Postel interpretation, though. eq = self.assertEqual - eq(Utils.parseaddr('""example" example"@example.com'), + eq(utils.parseaddr('""example" example"@example.com'), ('', '""example" example"@example.com')) - eq(Utils.parseaddr('"\\"example\\" example"@example.com'), + eq(utils.parseaddr('"\\"example\\" example"@example.com'), ('', '"\\"example\\" example"@example.com')) - eq(Utils.parseaddr('"\\\\"example\\\\" example"@example.com'), + eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), ('', '"\\\\"example\\\\" example"@example.com')) def test_multiline_from_comment(self): x = """\ Foo \tBar """ - self.assertEqual(Utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) + self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com')) def test_quote_dump(self): self.assertEqual( - Utils.formataddr(('A Silly; Person', 'person@dom.ain')), + utils.formataddr(('A Silly; Person', 'person@dom.ain')), r'"A Silly; Person" ') def test_fix_eols(self): eq = self.assertEqual - eq(Utils.fix_eols('hello'), 'hello') - eq(Utils.fix_eols('hello\n'), 'hello\r\n') - eq(Utils.fix_eols('hello\r'), 'hello\r\n') - eq(Utils.fix_eols('hello\r\n'), 'hello\r\n') - eq(Utils.fix_eols('hello\n\r'), 'hello\r\n\r\n') + eq(utils.fix_eols('hello'), 'hello') + eq(utils.fix_eols('hello\n'), 'hello\r\n') + eq(utils.fix_eols('hello\r'), 'hello\r\n') + eq(utils.fix_eols('hello\r\n'), 'hello\r\n') + eq(utils.fix_eols('hello\n\r'), 'hello\r\n\r\n') def test_charset_richcomparisons(self): eq = self.assertEqual @@ -2409,25 +2409,42 @@ Foo def test_getaddresses(self): eq = self.assertEqual - eq(Utils.getaddresses(['aperson@dom.ain (Al Person)', + eq(utils.getaddresses(['aperson@dom.ain (Al Person)', 'Bud Person ']), [('Al Person', 'aperson@dom.ain'), ('Bud Person', 'bperson@dom.ain')]) + def test_getaddresses_comma_in_name(self): + """GH-106669 regression test.""" + self.assertEqual( + utils.getaddresses( + [ + '"Bud, Person" ', + 'aperson@dom.ain (Al Person)', + '"Mariusz Felisiak" ', + ] + ), + [ + ('Bud, Person', 'bperson@dom.ain'), + ('Al Person', 'aperson@dom.ain'), + ('Mariusz Felisiak', 'to@example.com'), + ], + ) + def test_getaddresses_nasty(self): eq = self.assertEqual - eq(Utils.getaddresses(['foo: ;']), [('', '')]) - eq(Utils.getaddresses( + eq(utils.getaddresses(['foo: ;']), [('', '')]) + eq(utils.getaddresses( ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) - eq(Utils.getaddresses( + [('', ''),]) + eq(utils.getaddresses( ['foo: ;', '"Jason R. Mastaler" ']), [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" eq = self.assertEqual - addrs = Utils.getaddresses(['User ((nested comment)) ']) + addrs = utils.getaddresses(['User ((nested comment)) ']) eq(addrs[0][1], 'foo@bar.com') def test_make_msgid_collisions(self): @@ -2437,7 +2454,7 @@ Foo # generate msgids for 3 seconds self.msgids = [] append = self.msgids.append - make_msgid = Utils.make_msgid + make_msgid = utils.make_msgid clock = time.time tfin = clock() + 3.0 while clock() < tfin: --- a/Lib/email/test/test_email_renamed.py +++ b/Lib/email/test/test_email_renamed.py @@ -2280,7 +2280,7 @@ Foo eq(utils.getaddresses(['foo: ;']), [('', '')]) eq(utils.getaddresses( ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) + [('', ''),]) eq(utils.getaddresses( ['foo: ;', '"Jason R. Mastaler" ']), [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -101,56 +101,11 @@ def formataddr(pair): -def _pre_parse_validation(email_header_fields): - accepted_values = [] - for v in email_header_fields: - s = v.replace('\\(', '').replace('\\)', '') - if s.count('(') != s.count(')'): - v = "('', '')" - accepted_values.append(v) - - return accepted_values - - - -def _post_parse_validation(parsed_email_header_tuples): - accepted_values = [] - # The parser would have parsed a correctly formatted domain-literal - # The existence of an [ after parsing indicates a parsing failure - for v in parsed_email_header_tuples: - if '[' in v[1]: - v = ('', '') - accepted_values.append(v) - - return accepted_values - - - def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. - - When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in - its place. - - If the resulting list of parsed address is not the same as the number of - fieldvalues in the input list a parsing error has occurred. A list - containing a single empty 2-tuple [('', '')] is returned in its place. - This is done to avoid invalid output. - """ - fieldvalues = [str(v) for v in fieldvalues] - fieldvalues = _pre_parse_validation(fieldvalues) - all = COMMASPACE.join(v for v in fieldvalues) + """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" + all = COMMASPACE.join(str(v) for v in fieldvalues) a = _AddressList(all) - result = _post_parse_validation(a.addresslist) - - n = 0 - for v in fieldvalues: - n += v.count(',') + 1 - - if len(result) != n: - return [('', '')] - - return result + return a.addresslist @@ -262,18 +217,9 @@ def parseaddr(addr): Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). """ - if isinstance(addr, list): - addr = addr[0] - - if not isinstance(addr, str): - return ('', '') - - addr = _pre_parse_validation([addr])[0] - addrs = _post_parse_validation(_AddressList(addr).addresslist) - - if not addrs or len(addrs) > 1: - return ('', '') - + addrs = _AddressList(addr).addresslist + if not addrs: + return '', '' return addrs[0] --- a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst +++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst @@ -1,3 +1,8 @@ +Reverted the :mod:`email.utils` security improvement change released in +3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail +to parse email addresses with a comma in the quoted name field. +See :gh:`106669`. + CVE-2023-27043: Prevent :func:`email.utils.parseaddr` and :func:`email.utils.getaddresses` from returning the realname portion of an invalid RFC2822 email header in the email address portion of the 2-tuple