python/Revert-gh105127-left-tests.patch

427 lines
17 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 4288c623d62cf90d8e4444facb3379fb06d01140 Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith" <greg@krypto.org>
Date: Thu, 20 Jul 2023 20:30:52 -0700
Subject: [PATCH] [3.12] gh-106669: Revert "gh-102988: Detect email address
parsing errors ... (GH-105127)" (GH-106733)
This reverts commit 18dfbd035775c15533d13a98e56b1d2bf5c65f00.
Adds a regression test from the issue.
See https://github.com/python/cpython/issues/106669..
(cherry picked from commit a31dea1feb61793e48fa9aa5014f358352205c1d)
Co-authored-by: Gregory P. Smith <greg@krypto.org>
---
Doc/library/email.utils.rst | 24 --
Lib/email/test/test_email.py | 113 +++++-----
Lib/email/test/test_email_renamed.py | 2
Lib/email/utils.py | 66 -----
Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 5
5 files changed, 77 insertions(+), 133 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -63,11 +63,6 @@ There are several useful utilities provi
:func:`time.mktime`; otherwise ``None`` will be returned. Note that indexes 6,
7, and 8 of the result tuple are not usable.
- .. versionchanged:: 3.12
- For security reasons, addresses that were ambiguous and could parse into
- multiple different addresses now cause ``('', '')`` to be returned
- instead of only one of the *potential* addresses.
-
.. function:: parsedate_tz(date)
@@ -108,25 +103,6 @@ There are several useful utilities provi
.. versionadded:: 2.4
- When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
- is returned in its place. Other errors in parsing the list of
- addresses such as a fieldvalue seemingly parsing into multiple
- addresses may result in a list containing a single empty 2-tuple
- ``[('', '')]`` being returned rather than returning potentially
- invalid output.
-
- Example malformed input parsing:
-
- .. doctest::
-
- >>> from email.utils import getaddresses
- >>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com'])
- [('', '')]
-
- .. versionchanged:: 3.12
- The 2-tuple of ``('', '')`` in the returned values when parsing
- fails were added as to address a security issue.
-
.. function:: make_msgid([idstring])
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -30,7 +30,7 @@ from email.MIMEImage import MIMEImage
from email.MIMEBase import MIMEBase
from email.MIMEMessage import MIMEMessage
from email.MIMEMultipart import MIMEMultipart
-from email import Utils
+from email import utils
from email import Errors
from email import Encoders
from email import Iterators
@@ -2236,57 +2236,57 @@ class TestMiscellaneous(TestEmailBase):
def test_formatdate(self):
now = time.time()
- self.assertEqual(Utils.parsedate(Utils.formatdate(now))[:6],
+ self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
time.gmtime(now)[:6])
def test_formatdate_localtime(self):
now = time.time()
self.assertEqual(
- Utils.parsedate(Utils.formatdate(now, localtime=True))[:6],
+ utils.parsedate(utils.formatdate(now, localtime=True))[:6],
time.localtime(now)[:6])
def test_formatdate_usegmt(self):
now = time.time()
self.assertEqual(
- Utils.formatdate(now, localtime=False),
+ utils.formatdate(now, localtime=False),
time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
self.assertEqual(
- Utils.formatdate(now, localtime=False, usegmt=True),
+ utils.formatdate(now, localtime=False, usegmt=True),
time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
def test_parsedate_none(self):
- self.assertEqual(Utils.parsedate(''), None)
+ self.assertEqual(utils.parsedate(''), None)
def test_parsedate_compact(self):
# The FWS after the comma is optional
- self.assertEqual(Utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
- Utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
+ self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
+ utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
def test_parsedate_no_dayofweek(self):
eq = self.assertEqual
- eq(Utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
+ eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
(2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
def test_parsedate_compact_no_dayofweek(self):
eq = self.assertEqual
- eq(Utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
+ eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
(2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
def test_parsedate_acceptable_to_time_functions(self):
eq = self.assertEqual
- timetup = Utils.parsedate('5 Feb 2003 13:47:26 -0800')
+ timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
t = int(time.mktime(timetup))
eq(time.localtime(t)[:6], timetup[:6])
eq(int(time.strftime('%Y', timetup)), 2003)
- timetup = Utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
+ timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
t = int(time.mktime(timetup[:9]))
eq(time.localtime(t)[:6], timetup[:6])
eq(int(time.strftime('%Y', timetup[:9])), 2003)
def test_mktime_tz(self):
- self.assertEqual(Utils.mktime_tz((1970, 1, 1, 0, 0, 0,
+ self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
-1, -1, -1, 0)), 0)
- self.assertEqual(Utils.mktime_tz((1970, 1, 1, 0, 0, 0,
+ self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
-1, -1, -1, 1234)), -1234)
def test_parsedate_y2k(self):
@@ -2297,58 +2297,58 @@ class TestMiscellaneous(TestEmailBase):
obsoletes RFC822) requires four-digit years.
"""
- self.assertEqual(Utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
- Utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
- self.assertEqual(Utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
- Utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
+ self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
+ utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
+ self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
+ utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
def test_parseaddr_empty(self):
- self.assertEqual(Utils.parseaddr('<>'), ('', ''))
- self.assertEqual(Utils.formataddr(Utils.parseaddr('<>')), '')
+ self.assertEqual(utils.parseaddr('<>'), ('', ''))
+ self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
def test_parseaddr_multiple_domains(self):
self.assertEqual(
- Utils.parseaddr('a@b@c'),
+ utils.parseaddr('a@b@c'),
('', '')
)
self.assertEqual(
- Utils.parseaddr('a@b.c@c'),
+ utils.parseaddr('a@b.c@c'),
('', '')
)
self.assertEqual(
- Utils.parseaddr('a@172.17.0.1@c'),
+ utils.parseaddr('a@172.17.0.1@c'),
('', '')
)
def test_noquote_dump(self):
self.assertEqual(
- Utils.formataddr(('A Silly Person', 'person@dom.ain')),
+ utils.formataddr(('A Silly Person', 'person@dom.ain')),
'A Silly Person <person@dom.ain>')
def test_escape_dump(self):
self.assertEqual(
- Utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
+ utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
r'"A \(Very\) Silly Person" <person@dom.ain>')
a = r'A \(Special\) Person'
b = 'person@dom.ain'
- self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b))
+ self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
def test_escape_backslashes(self):
self.assertEqual(
- Utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
+ utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
a = r'Arthur \Backslash\ Foobar'
b = 'person@dom.ain'
- self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b))
+ self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
def test_name_with_dot(self):
x = 'John X. Doe <jxd@example.com>'
y = '"John X. Doe" <jxd@example.com>'
a, b = ('John X. Doe', 'jxd@example.com')
- self.assertEqual(Utils.parseaddr(x), (a, b))
- self.assertEqual(Utils.parseaddr(y), (a, b))
+ self.assertEqual(utils.parseaddr(x), (a, b))
+ self.assertEqual(utils.parseaddr(y), (a, b))
# formataddr() quotes the name if there's a dot in it
- self.assertEqual(Utils.formataddr((a, b)), y)
+ self.assertEqual(utils.formataddr((a, b)), y)
def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
# issue 10005. Note that in the third test the second pair of
@@ -2361,31 +2361,31 @@ class TestMiscellaneous(TestEmailBase):
# not appear in an address outside of a quoted string. It is probably
# a sensible Postel interpretation, though.
eq = self.assertEqual
- eq(Utils.parseaddr('""example" example"@example.com'),
+ eq(utils.parseaddr('""example" example"@example.com'),
('', '""example" example"@example.com'))
- eq(Utils.parseaddr('"\\"example\\" example"@example.com'),
+ eq(utils.parseaddr('"\\"example\\" example"@example.com'),
('', '"\\"example\\" example"@example.com'))
- eq(Utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
+ eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
('', '"\\\\"example\\\\" example"@example.com'))
def test_multiline_from_comment(self):
x = """\
Foo
\tBar <foo@example.com>"""
- self.assertEqual(Utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
+ self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
def test_quote_dump(self):
self.assertEqual(
- Utils.formataddr(('A Silly; Person', 'person@dom.ain')),
+ utils.formataddr(('A Silly; Person', 'person@dom.ain')),
r'"A Silly; Person" <person@dom.ain>')
def test_fix_eols(self):
eq = self.assertEqual
- eq(Utils.fix_eols('hello'), 'hello')
- eq(Utils.fix_eols('hello\n'), 'hello\r\n')
- eq(Utils.fix_eols('hello\r'), 'hello\r\n')
- eq(Utils.fix_eols('hello\r\n'), 'hello\r\n')
- eq(Utils.fix_eols('hello\n\r'), 'hello\r\n\r\n')
+ eq(utils.fix_eols('hello'), 'hello')
+ eq(utils.fix_eols('hello\n'), 'hello\r\n')
+ eq(utils.fix_eols('hello\r'), 'hello\r\n')
+ eq(utils.fix_eols('hello\r\n'), 'hello\r\n')
+ eq(utils.fix_eols('hello\n\r'), 'hello\r\n\r\n')
def test_charset_richcomparisons(self):
eq = self.assertEqual
@@ -2409,25 +2409,42 @@ Foo
def test_getaddresses(self):
eq = self.assertEqual
- eq(Utils.getaddresses(['aperson@dom.ain (Al Person)',
+ eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
'Bud Person <bperson@dom.ain>']),
[('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')])
+ def test_getaddresses_comma_in_name(self):
+ """GH-106669 regression test."""
+ self.assertEqual(
+ utils.getaddresses(
+ [
+ '"Bud, Person" <bperson@dom.ain>',
+ 'aperson@dom.ain (Al Person)',
+ '"Mariusz Felisiak" <to@example.com>',
+ ]
+ ),
+ [
+ ('Bud, Person', 'bperson@dom.ain'),
+ ('Al Person', 'aperson@dom.ain'),
+ ('Mariusz Felisiak', 'to@example.com'),
+ ],
+ )
+
def test_getaddresses_nasty(self):
eq = self.assertEqual
- eq(Utils.getaddresses(['foo: ;']), [('', '')])
- eq(Utils.getaddresses(
+ eq(utils.getaddresses(['foo: ;']), [('', '')])
+ eq(utils.getaddresses(
['[]*-- =~$']),
- [('', ''), ('', ''), ('', '*--')])
- eq(Utils.getaddresses(
+ [('', ''),])
+ eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
eq = self.assertEqual
- addrs = Utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
+ addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
eq(addrs[0][1], 'foo@bar.com')
def test_make_msgid_collisions(self):
@@ -2437,7 +2454,7 @@ Foo
# generate msgids for 3 seconds
self.msgids = []
append = self.msgids.append
- make_msgid = Utils.make_msgid
+ make_msgid = utils.make_msgid
clock = time.time
tfin = clock() + 3.0
while clock() < tfin:
--- a/Lib/email/test/test_email_renamed.py
+++ b/Lib/email/test/test_email_renamed.py
@@ -2280,7 +2280,7 @@ Foo
eq(utils.getaddresses(['foo: ;']), [('', '')])
eq(utils.getaddresses(
['[]*-- =~$']),
- [('', ''), ('', ''), ('', '*--')])
+ [('', ''),])
eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -101,56 +101,11 @@ def formataddr(pair):
-def _pre_parse_validation(email_header_fields):
- accepted_values = []
- for v in email_header_fields:
- s = v.replace('\\(', '').replace('\\)', '')
- if s.count('(') != s.count(')'):
- v = "('', '')"
- accepted_values.append(v)
-
- return accepted_values
-
-
-
-def _post_parse_validation(parsed_email_header_tuples):
- accepted_values = []
- # The parser would have parsed a correctly formatted domain-literal
- # The existence of an [ after parsing indicates a parsing failure
- for v in parsed_email_header_tuples:
- if '[' in v[1]:
- v = ('', '')
- accepted_values.append(v)
-
- return accepted_values
-
-
-
def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
-
- When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
- its place.
-
- If the resulting list of parsed address is not the same as the number of
- fieldvalues in the input list a parsing error has occurred. A list
- containing a single empty 2-tuple [('', '')] is returned in its place.
- This is done to avoid invalid output.
- """
- fieldvalues = [str(v) for v in fieldvalues]
- fieldvalues = _pre_parse_validation(fieldvalues)
- all = COMMASPACE.join(v for v in fieldvalues)
+ """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+ all = COMMASPACE.join(str(v) for v in fieldvalues)
a = _AddressList(all)
- result = _post_parse_validation(a.addresslist)
-
- n = 0
- for v in fieldvalues:
- n += v.count(',') + 1
-
- if len(result) != n:
- return [('', '')]
-
- return result
+ return a.addresslist
@@ -262,18 +217,9 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
- if isinstance(addr, list):
- addr = addr[0]
-
- if not isinstance(addr, str):
- return ('', '')
-
- addr = _pre_parse_validation([addr])[0]
- addrs = _post_parse_validation(_AddressList(addr).addresslist)
-
- if not addrs or len(addrs) > 1:
- return ('', '')
-
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return '', ''
return addrs[0]
--- a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
+++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
@@ -1,3 +1,8 @@
+Reverted the :mod:`email.utils` security improvement change released in
+3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail
+to parse email addresses with a comma in the quoted name field.
+See :gh:`106669`.
+
CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
and :func:`email.utils.getaddresses` from returning the realname portion of an
invalid RFC2822 email header in the email address portion of the 2-tuple