python/CVE-2023-27043-email-parsing-errors.patch

138 lines
4.4 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

---
Doc/library/email.utils.rst | 24 +++
Lib/email/utils.py | 66 +++++++++-
Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 4
3 files changed, 88 insertions(+), 6 deletions(-)
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -63,6 +63,11 @@ There are several useful utilities provi
:func:`time.mktime`; otherwise ``None`` will be returned. Note that indexes 6,
7, and 8 of the result tuple are not usable.
+ .. versionchanged:: 3.12
+ For security reasons, addresses that were ambiguous and could parse into
+ multiple different addresses now cause ``('', '')`` to be returned
+ instead of only one of the *potential* addresses.
+
.. function:: parsedate_tz(date)
@@ -103,6 +108,25 @@ There are several useful utilities provi
.. versionadded:: 2.4
+ When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
+ is returned in its place. Other errors in parsing the list of
+ addresses such as a fieldvalue seemingly parsing into multiple
+ addresses may result in a list containing a single empty 2-tuple
+ ``[('', '')]`` being returned rather than returning potentially
+ invalid output.
+
+ Example malformed input parsing:
+
+ .. doctest::
+
+ >>> from email.utils import getaddresses
+ >>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com'])
+ [('', '')]
+
+ .. versionchanged:: 3.12
+ The 2-tuple of ``('', '')`` in the returned values when parsing
+ fails were added as to address a security issue.
+
.. function:: make_msgid([idstring])
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -101,11 +101,56 @@ def formataddr(pair):
+def _pre_parse_validation(email_header_fields):
+ accepted_values = []
+ for v in email_header_fields:
+ s = v.replace('\\(', '').replace('\\)', '')
+ if s.count('(') != s.count(')'):
+ v = "('', '')"
+ accepted_values.append(v)
+
+ return accepted_values
+
+
+
+def _post_parse_validation(parsed_email_header_tuples):
+ accepted_values = []
+ # The parser would have parsed a correctly formatted domain-literal
+ # The existence of an [ after parsing indicates a parsing failure
+ for v in parsed_email_header_tuples:
+ if '[' in v[1]:
+ v = ('', '')
+ accepted_values.append(v)
+
+ return accepted_values
+
+
+
def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
- all = COMMASPACE.join(fieldvalues)
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
+
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
+ its place.
+
+ If the resulting list of parsed address is not the same as the number of
+ fieldvalues in the input list a parsing error has occurred. A list
+ containing a single empty 2-tuple [('', '')] is returned in its place.
+ This is done to avoid invalid output.
+ """
+ fieldvalues = [str(v) for v in fieldvalues]
+ fieldvalues = _pre_parse_validation(fieldvalues)
+ all = COMMASPACE.join(v for v in fieldvalues)
a = _AddressList(all)
- return a.addresslist
+ result = _post_parse_validation(a.addresslist)
+
+ n = 0
+ for v in fieldvalues:
+ n += v.count(',') + 1
+
+ if len(result) != n:
+ return [('', '')]
+
+ return result
@@ -217,9 +262,18 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
- addrs = _AddressList(addr).addresslist
- if not addrs:
- return '', ''
+ if isinstance(addr, list):
+ addr = addr[0]
+
+ if not isinstance(addr, str):
+ return ('', '')
+
+ addr = _pre_parse_validation([addr])[0]
+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
+
+ if not addrs or len(addrs) > 1:
+ return ('', '')
+
return addrs[0]
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
@@ -0,0 +1,4 @@
+CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
+and :func:`email.utils.getaddresses` from returning the realname portion of an
+invalid RFC2822 email header in the email address portion of the 2-tuple
+returned after being parsed by :class:`email._parseaddr.AddressList`.