python310/CVE-2023-27043-email-parsing-errors.patch
Matej Cepl 4a7871d409 - Add Revert-gh105127-left-tests.patch (gh#python/cpython!106941)
partially reverting CVE-2023-27043-email-parsing-errors.patch,
  because of the regression in gh#python/cpython#106669.

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python310?expand=0&rev=103
2023-08-03 14:14:37 +00:00

250 lines
10 KiB
Diff

---
Doc/library/email.utils.rst | 26 +++
Lib/email/utils.py | 63 +++++++
Lib/test/test_email/test_email.py | 81 +++++++++-
Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 4
4 files changed, 164 insertions(+), 10 deletions(-)
Index: Python-3.10.12/Doc/library/email.utils.rst
===================================================================
--- Python-3.10.12.orig/Doc/library/email.utils.rst
+++ Python-3.10.12/Doc/library/email.utils.rst
@@ -67,6 +67,11 @@ of the new API.
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
+ .. versionchanged:: 3.12
+ For security reasons, addresses that were ambiguous and could parse into
+ multiple different addresses now cause ``('', '')`` to be returned
+ instead of only one of the *potential* addresses.
+
.. function:: formataddr(pair, charset='utf-8')
@@ -89,7 +94,7 @@ of the new API.
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
- example that gets all the recipients of a message::
+ example that gets all the recipients of a message:
from email.utils import getaddresses
@@ -99,6 +104,25 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
+ When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
+ is returned in its place. Other errors in parsing the list of
+ addresses such as a fieldvalue seemingly parsing into multiple
+ addresses may result in a list containing a single empty 2-tuple
+ ``[('', '')]`` being returned rather than returning potentially
+ invalid output.
+
+ Example malformed input parsing:
+
+ .. doctest::
+
+ >>> from email.utils import getaddresses
+ >>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com'])
+ [('', '')]
+
+ .. versionchanged:: 3.12
+ The 2-tuple of ``('', '')`` in the returned values when parsing
+ fails were added as to address a security issue.
+
.. function:: parsedate(date)
Index: Python-3.10.12/Lib/email/utils.py
===================================================================
--- Python-3.10.12.orig/Lib/email/utils.py
+++ Python-3.10.12/Lib/email/utils.py
@@ -106,12 +106,54 @@ def formataddr(pair, charset='utf-8'):
return address
+def _pre_parse_validation(email_header_fields):
+ accepted_values = []
+ for v in email_header_fields:
+ s = v.replace('\\(', '').replace('\\)', '')
+ if s.count('(') != s.count(')'):
+ v = "('', '')"
+ accepted_values.append(v)
+
+ return accepted_values
+
+
+def _post_parse_validation(parsed_email_header_tuples):
+ accepted_values = []
+ # The parser would have parsed a correctly formatted domain-literal
+ # The existence of an [ after parsing indicates a parsing failure
+ for v in parsed_email_header_tuples:
+ if '[' in v[1]:
+ v = ('', '')
+ accepted_values.append(v)
+
+ return accepted_values
+
def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
- all = COMMASPACE.join(str(v) for v in fieldvalues)
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
+
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
+ its place.
+
+ If the resulting list of parsed address is not the same as the number of
+ fieldvalues in the input list a parsing error has occurred. A list
+ containing a single empty 2-tuple [('', '')] is returned in its place.
+ This is done to avoid invalid output.
+ """
+ fieldvalues = [str(v) for v in fieldvalues]
+ fieldvalues = _pre_parse_validation(fieldvalues)
+ all = COMMASPACE.join(v for v in fieldvalues)
a = _AddressList(all)
- return a.addresslist
+ result = _post_parse_validation(a.addresslist)
+
+ n = 0
+ for v in fieldvalues:
+ n += v.count(',') + 1
+
+ if len(result) != n:
+ return [('', '')]
+
+ return result
def _format_timetuple_and_zone(timetuple, zone):
@@ -212,9 +254,18 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
- addrs = _AddressList(addr).addresslist
- if not addrs:
- return '', ''
+ if isinstance(addr, list):
+ addr = addr[0]
+
+ if not isinstance(addr, str):
+ return ('', '')
+
+ addr = _pre_parse_validation([addr])[0]
+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
+
+ if not addrs or len(addrs) > 1:
+ return ('', '')
+
return addrs[0]
Index: Python-3.10.12/Lib/test/test_email/test_email.py
===================================================================
--- Python-3.10.12.orig/Lib/test/test_email/test_email.py
+++ Python-3.10.12/Lib/test/test_email/test_email.py
@@ -3288,15 +3288,90 @@ Foo
[('Al Person', 'aperson@dom.ain'),
('Bud Person', 'bperson@dom.ain')])
+ def test_getaddresses_parsing_errors(self):
+ """Test for parsing errors from CVE-2023-27043"""
+ eq = self.assertEqual
+ eq(utils.getaddresses(['alice@example.org(<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org)<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org<<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org><bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org@<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org,<bob@example.com>']),
+ [('', 'alice@example.org'), ('', 'bob@example.com')])
+ eq(utils.getaddresses(['alice@example.org;<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org:<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org.<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org"<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org[<bob@example.com>']),
+ [('', '')])
+ eq(utils.getaddresses(['alice@example.org]<bob@example.com>']),
+ [('', '')])
+
+ def test_parseaddr_parsing_errors(self):
+ """Test for parsing errors from CVE-2023-27043"""
+ eq = self.assertEqual
+ eq(utils.parseaddr(['alice@example.org(<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org)<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org<<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org><bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org@<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org,<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org;<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org:<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org.<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org"<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org[<bob@example.com>']),
+ ('', ''))
+ eq(utils.parseaddr(['alice@example.org]<bob@example.com>']),
+ ('', ''))
+
def test_getaddresses_nasty(self):
eq = self.assertEqual
eq(utils.getaddresses(['foo: ;']), [('', '')])
- eq(utils.getaddresses(
- ['[]*-- =~$']),
- [('', ''), ('', ''), ('', '*--')])
+ eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
+ eq(utils.getaddresses(
+ [r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')])
+ eq(utils.getaddresses(
+ ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
+ [('', '')])
+ eq(utils.getaddresses(
+ ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']),
+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')])
+ eq(utils.getaddresses(
+ ['John Doe <jdoe@machine(comment). example>']),
+ [('John Doe (comment)', 'jdoe@machine.example')])
+ eq(utils.getaddresses(
+ ['"Mary Smith: Personal Account" <smith@home.example>']),
+ [('Mary Smith: Personal Account', 'smith@home.example')])
+ eq(utils.getaddresses(
+ ['Undisclosed recipients:;']),
+ [('', '')])
+ eq(utils.getaddresses(
+ [r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>']),
+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
Index: Python-3.10.12/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
===================================================================
--- /dev/null
+++ Python-3.10.12/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
@@ -0,0 +1,4 @@
+CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
+and :func:`email.utils.getaddresses` from returning the realname portion of an
+invalid RFC2822 email header in the email address portion of the 2-tuple
+returned after being parsed by :class:`email._parseaddr.AddressList`.