2023-08-03 16:58:20 +02:00
|
|
|
---
|
2023-12-19 16:40:30 +01:00
|
|
|
Doc/library/email.utils.rst | 19 -
|
|
|
|
Lib/email/utils.py | 151 +++++++-
|
|
|
|
Lib/test/test_email/test_email.py | 187 +++++++++-
|
|
|
|
Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8
|
|
|
|
4 files changed, 344 insertions(+), 21 deletions(-)
|
2023-08-03 16:58:20 +02:00
|
|
|
|
2024-05-01 11:01:36 +02:00
|
|
|
--- a/Doc/library/email.utils.rst
|
|
|
|
+++ b/Doc/library/email.utils.rst
|
2023-12-19 16:40:30 +01:00
|
|
|
@@ -60,13 +60,18 @@ of the new API.
|
|
|
|
begins with angle brackets, they are stripped off.
|
|
|
|
|
|
|
|
|
|
|
|
-.. function:: parseaddr(address)
|
|
|
|
+.. function:: parseaddr(address, *, strict=True)
|
|
|
|
|
|
|
|
Parse address -- which should be the value of some address-containing field such
|
|
|
|
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
|
2023-08-03 16:58:20 +02:00
|
|
|
*email address* parts. Returns a tuple of that information, unless the parse
|
|
|
|
fails, in which case a 2-tuple of ``('', '')`` is returned.
|
|
|
|
|
2023-12-19 16:40:30 +01:00
|
|
|
+ If *strict* is true, use a strict parser which rejects malformed inputs.
|
|
|
|
+
|
|
|
|
+ .. versionchanged:: 3.13
|
|
|
|
+ Add *strict* optional parameter and reject malformed inputs by default.
|
2023-08-03 16:58:20 +02:00
|
|
|
+
|
|
|
|
|
|
|
|
.. function:: formataddr(pair, charset='utf-8')
|
|
|
|
|
2023-12-19 16:40:30 +01:00
|
|
|
@@ -84,12 +89,15 @@ of the new API.
|
|
|
|
Added the *charset* option.
|
|
|
|
|
|
|
|
|
|
|
|
-.. function:: getaddresses(fieldvalues)
|
|
|
|
+.. function:: getaddresses(fieldvalues, *, strict=True)
|
|
|
|
|
2023-08-03 16:58:20 +02:00
|
|
|
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
|
|
|
|
*fieldvalues* is a sequence of header field values as might be returned by
|
2023-12-19 16:40:30 +01:00
|
|
|
- :meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
|
2023-08-03 16:58:20 +02:00
|
|
|
- example that gets all the recipients of a message::
|
2023-12-19 16:40:30 +01:00
|
|
|
+ :meth:`Message.get_all <email.message.Message.get_all>`.
|
|
|
|
+
|
|
|
|
+ If *strict* is true, use a strict parser which rejects malformed inputs.
|
|
|
|
+
|
|
|
|
+ Here's a simple example that gets all the recipients of a message::
|
2023-08-03 16:58:20 +02:00
|
|
|
|
|
|
|
from email.utils import getaddresses
|
|
|
|
|
2023-12-19 16:40:30 +01:00
|
|
|
@@ -99,6 +107,9 @@ of the new API.
|
2023-08-03 16:58:20 +02:00
|
|
|
resent_ccs = msg.get_all('resent-cc', [])
|
|
|
|
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
|
|
|
|
|
2023-12-19 16:40:30 +01:00
|
|
|
+ .. versionchanged:: 3.13
|
|
|
|
+ Add *strict* optional parameter and reject malformed inputs by default.
|
2023-08-03 16:58:20 +02:00
|
|
|
+
|
|
|
|
|
|
|
|
.. function:: parsedate(date)
|
|
|
|
|
2024-05-01 11:01:36 +02:00
|
|
|
--- a/Lib/email/utils.py
|
|
|
|
+++ b/Lib/email/utils.py
|
2023-12-19 16:40:30 +01:00
|
|
|
@@ -48,6 +48,7 @@ TICK = "'"
|
|
|
|
specialsre = re.compile(r'[][\\()<>@,:;".]')
|
|
|
|
escapesre = re.compile(r'[\\"]')
|
|
|
|
|
|
|
|
+
|
|
|
|
def _has_surrogates(s):
|
Accepting request 1145174 from home:dgarcia:branches:devel:languages:python:Factory
- Update to 3.11.8:
- Security
- gh-113659: Skip .pth files with names starting with a dot or
hidden file attribute.
- Core and Builtins
- gh-114887: Changed socket type validation in
create_datagram_endpoint() to accept all non-stream sockets.
This fixes a regression in compatibility with raw sockets.
- gh-114388: Fix a RuntimeWarning emitted when assign an
integer-like value that is not an instance of int to an
attribute that corresponds to a C struct member of type T_UINT
and T_ULONG. Fix a double RuntimeWarning emitted when assign a
negative integer value to an attribute that corresponds to a C
struct member of type T_UINT.
- gh-89811: Check for a valid tp_version_tag before performing
bytecode specializations that rely on this value being usable.
- gh-113602: Fix an error that was causing the parser to try to
overwrite existing errors and crashing in the process. Patch by
Pablo Galindo
- gh-113566: Fix a 3.11-specific crash when the repr of a Future
is requested after the module has already been
garbage-collected.
- gh-106905: Use per AST-parser state rather than global state to
track recursion depth within the AST parser to prevent potential
race condition due to simultaneous parsing.
- The issue primarily showed up in 3.11 by multithreaded users of
ast.parse(). In 3.12 a change to when garbage collection can be
triggered prevented the race condition from occurring.
- gh-112716: Fix SystemError in the import statement and in
__reduce__() methods of builtin types when __builtins__ is not a
dict.
- gh-105967: Workaround a bug in Apple’s macOS platform zlib
library where zlib.crc32() and binascii.crc32() could produce
incorrect results on multi-gigabyte inputs. Including when using
zipfile on zips containing large data.
- gh-94606: Fix UnicodeEncodeError when
email.message.get_payload() reads a message with a Unicode
surrogate character and the message content is not well-formed
for surrogateescape encoding. Patch by Sidney Markowitz.
- Library
- gh-114965: Update bundled pip to 24.0
- gh-114959: tarfile no longer ignores errors when trying to
extract a directory on top of a file.
- gh-109475: Fix support of explicit option value “–” in argparse
(e.g. --option=--).
- gh-110190: Fix ctypes structs with array on Windows ARM64
platform by setting MAX_STRUCT_SIZE to 32 in stgdict. Patch by
Diego Russo
- gh-113280: Fix a leak of open socket in rare cases when error
occurred in ssl.SSLSocket creation.
- gh-77749: email.policy.EmailPolicy.fold() now always encodes
non-ASCII characters in headers if utf8 is false.
- gh-114492: Make the result of termios.tcgetattr() reproducible
on Alpine Linux. Previously it could leave a random garbage in
some fields.
- gh-75128: Ignore an OSError in
asyncio.BaseEventLoop.create_server() when IPv6 is available but
the interface cannot actually support it.
- gh-114257: Dismiss the FileNotFound error in
ctypes.util.find_library() and just return None on Linux.
- gh-101438: Avoid reference cycle in ElementTree.iterparse. The
iterator returned by ElementTree.iterparse may hold on to a file
descriptor. The reference cycle prevented prompt clean-up of the
file descriptor if the returned iterator was not exhausted.
- gh-104522: OSError raised when run a subprocess now only has
filename attribute set to cwd if the error was caused by a
failed attempt to change the current directory.
- gh-109534: Fix a reference leak in
asyncio.selector_events.BaseSelectorEventLoop when SSL
handshakes fail. Patch contributed by Jamie Phan.
- gh-114077: Fix possible OverflowError in
socket.socket.sendfile() when pass count larger than 2 GiB on
32-bit platform.
- gh-114014: Fixed a bug in fractions.Fraction where an invalid
string using d in the decimals part creates a different error
compared to other invalid letters/characters. Patch by Jeremiah
Gabriel Pascual.
- gh-113951: Fix the behavior of tag_unbind() methods of
tkinter.Text and tkinter.Canvas classes with three arguments.
Previously, widget.tag_unbind(tag, sequence, funcid) destroyed
the current binding for sequence, leaving sequence unbound, and
deleted the funcid command. Now it removes only funcid from the
binding for sequence, keeping other commands, and deletes the
funcid command. It leaves sequence unbound only if funcid was
the last bound command.
- gh-113877: Fix tkinter method winfo_pathname() on 64-bit
Windows.
- gh-113781: Silence unraisable AttributeError when warnings are
emitted during Python finalization.
- gh-113594: Fix UnicodeEncodeError in email when re-fold lines
that contain unknown-8bit encoded part followed by
non-unknown-8bit encoded part.
- gh-113538: In asyncio.StreamReaderProtocol.connection_made(),
there is callback that logs an error if the task wrapping the
“connected callback” fails. This callback would itself fail if
the task was cancelled. Prevent this by checking whether the
task was cancelled first. If so, close the transport but don’t
log an error.
- gh-85567: Fix resource warnings for unclosed files in pickle and
pickletools command line interfaces.
- gh-101225: Increase the backlog for
multiprocessing.connection.Listener objects created by
multiprocessing.manager and multiprocessing.resource_sharer to
significantly reduce the risk of getting a connection refused
error when creating a multiprocessing.connection.Connection to
them.
- gh-113543: Make sure that webbrowser.MacOSXOSAScript sends
webbrowser.open audit event.
- gh-113028: When a second reference to a string appears in the
input to pickle, and the Python implementation is in use, we are
guaranteed that a single copy gets pickled and a single object
is shared when reloaded. Previously, in protocol 0, when a
string contained certain characters (e.g. newline) it resulted
in duplicate objects.
- gh-113421: Fix multiprocessing logger for %(filename)s.
- gh-113358: Fix rendering tracebacks for exceptions with a broken
__getattr__.
- gh-113214: Fix an AttributeError during asyncio SSL protocol
aborts in SSL-over-SSL scenarios.
- gh-113246: Update bundled pip to 23.3.2.
- gh-113199: Make http.client.HTTPResponse.read1 and
http.client.HTTPResponse.readline close IO after reading all
data when content length is known. Patch by Illia Volochii.
- gh-113188: Fix shutil.copymode() and shutil.copystat() on
Windows. Previously they worked differenly if dst is a symbolic
link: they modified the permission bits of dst itself rather
than the file it points to if follow_symlinks is true or src is
not a symbolic link, and did not modify the permission bits if
follow_symlinks is false and src is a symbolic link.
- gh-61648: Detect line numbers of properties in doctests.
- gh-112559: signal.signal() and signal.getsignal() no longer call
repr on callable handlers. asyncio.run() and
asyncio.Runner.run() no longer call repr on the task results.
Patch by Yilei Yang.
- gh-110190: Fix ctypes structs with array on PPC64LE platform by
setting MAX_STRUCT_SIZE to 64 in stgdict. Patch by Diego Russo.
- gh-79429: Ignore FileNotFoundError when remove a temporary
directory in the multiprocessing finalizer.
- gh-79325: Fix an infinite recursion error in
tempfile.TemporaryDirectory() cleanup on Windows.
- gh-110190: Fix ctypes structs with array on Arm platform by
setting MAX_STRUCT_SIZE to 32 in stgdict. Patch by Diego Russo.
- gh-81194: Fix a crash in socket.if_indextoname() with specific
value (UINT_MAX). Fix an integer overflow in
socket.if_indextoname() on 64-bit non-Windows platforms.
- gh-75666: Fix the behavior of tkinter widget’s unbind() method
with two arguments. Previously, widget.unbind(sequence, funcid)
destroyed the current binding for sequence, leaving sequence
unbound, and deleted the funcid command. Now it removes only
funcid from the binding for sequence, keeping other commands,
and deletes the funcid command. It leaves sequence unbound only
if funcid was the last bound command.
- gh-110345: Show the Tcl/Tk patchlevel (rather than version) in
tkinter._test().
- gh-109858: Protect zipfile from “quoted-overlap” zipbomb. It now
raises BadZipFile when try to read an entry that overlaps with
other entry or central directory.
- gh-38807: Fix race condition in trace. Instead of checking if a
directory exists and creating it, directly call os.makedirs()
with the kwarg exist_ok=True.
- gh-75705: Set unixfrom envelope in mailbox.mbox and
mailbox.MMDF.
- gh-105102: Allow ctypes.Union to be nested in ctypes.Structure
when the system endianness is the opposite of the classes.
- gh-104282: Fix null pointer dereference in
lzma._decode_filter_properties() due to improper handling of BCJ
filters with properties of zero length. Patch by Radislav
Chugunov.
- gh-102512: When os.fork() is called from a foreign thread (aka
_DummyThread), the type of the thread in a child process is
changed to _MainThread. Also changed its name and daemonic
status, it can be now joined.
- gh-91133: Fix a bug in tempfile.TemporaryDirectory cleanup,
which now no longer dereferences symlinks when working around
file system permission errors.
- bpo-43153: On Windows, tempfile.TemporaryDirectory previously
masked a PermissionError with NotADirectoryError during
directory cleanup. It now correctly raises PermissionError if
errors are not ignored. Patch by Andrei Kulakov and Ken Jin.
- bpo-35332: The shutil.rmtree() function now ignores errors when
calling os.close() when ignore_errors is True, and os.close() no
longer retried after error.
- bpo-35928: io.TextIOWrapper now correctly handles the decoding
buffer after read() and write().
- bpo-26791: shutil.move() now moves a symlink into a directory
when that directory is the target of the symlink. This provides
the same behavior as the mv shell command. The previous behavior
raised an exception. Patch by Jeffrey Kintscher.
- bpo-36959: Fix some error messages for invalid ISO format string
combinations in strptime() that referred to directives not
contained in the format string. Patch by Gordon P. Hemsley.
- bpo-18060: Fixed a class inheritance issue that can cause
segfaults when deriving two or more levels of subclasses from a
base class of Structure or Union.
- Documentation
- gh-110746: Improved markup for valid options/values for methods
ttk.treeview.column and ttk.treeview.heading, and for Layouts.
- gh-95649: Document that the asyncio module contains code taken
from v0.16.0 of the uvloop project, as well as the required MIT
licensing information.
- Tests
- gh-109980: Fix test_tarfile_vs_tar in test_shutil for macOS,
where system tar can include more information in the archive
than shutil.make_archive.
- gh-112769: The tests now correctly compare zlib version when
zlib.ZLIB_RUNTIME_VERSION contains non-integer suffixes. For
example zlib-ng defines the version as 1.3.0.zlib-ng.
- gh-105089: Fix
test.test_zipfile.test_core.TestWithDirectory.test_create_directory_with_write
test in AIX by doing a bitwise AND of 0xFFFF on mode , so that
it will be in sync with zinfo.external_attr
- bpo-40648: Test modes that file can get with chmod() on Windows.
- Build
- gh-101778: Fix build error when there’s a dangling symlink in
the directory containing ffi.h.
- gh-112305: Fixed the check-clean-src step performed on out of
tree builds to detect errant $(srcdir)/Python/frozen_modules/*.h
files and recommend appropriate source tree cleanup steps to get
a working build again.
- bpo-11102: The os.major(), os.makedev(), and os.minor()
functions are now available on HP-UX v3.
- bpo-36351: Do not set ipv6type when cross-compiling.
- IDLE
- gh-96905: In idlelib code, stop redefining built-ins ‘dict’ and
‘object’.
- gh-72284: Improve the lists of features, editor key bindings,
and shell key bingings in the IDLE doc.
- gh-113903: Fix rare failure of test.test_idle, in
test_configdialog.
- gh-113729: Fix the “Help -> IDLE Doc” menu bug in 3.11.7 and
3.12.1.
- gh-113269: Fix test_editor hang on macOS Catalina.
- gh-112898: Fix processing unsaved files when quitting IDLE on
macOS.
- gh-103820: Revise IDLE bindings so that events from mouse button
4/5 on non-X11 windowing systems (i.e. Win32 and Aqua) are not
mistaken for scrolling.
- bpo-13586: Enter the selected text when opening the “Replace”
dialog.
- Tools/Demos
- gh-109991: Update GitHub CI workflows to use OpenSSL 3.0.13 and
multissltests to use 1.1.1w, 3.0.13, 3.1.5, and 3.2.1.
- gh-115015: Fix a bug in Argument Clinic that generated incorrect
code for methods with no parameters that use the METH_METHOD |
METH_FASTCALL | METH_KEYWORDS calling convention. Only the
positional parameter count was checked; any keyword argument
passed would be silently accepted.
- Refresh all patches:
- CVE-2023-27043-email-parsing-errors.patch
- F00251-change-user-install-location.patch
- bpo-31046_ensurepip_honours_prefix.patch
- distutils-reproducible-compile.patch
- fix_configure_rst.patch
- python-3.3.0b1-fix_date_time_compiler.patch
- python-3.3.0b1-localpath.patch
- python-3.3.0b1-test-posix_fadvise.patch
- skip_if_buildbot-extend.patch
- subprocess-raise-timeout.patch
- support-expat-CVE-2022-25236-patched.patch
OBS-URL: https://build.opensuse.org/request/show/1145174
OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python311?expand=0&rev=99
2024-02-08 13:49:59 +01:00
|
|
|
"""Return True if s may contain surrogate-escaped binary data."""
|
2023-12-19 16:40:30 +01:00
|
|
|
# This check is based on the fact that unless there are surrogates, utf8
|
|
|
|
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
|
2023-08-03 16:58:20 +02:00
|
|
|
return address
|
|
|
|
|
|
|
|
|
2023-12-19 16:40:30 +01:00
|
|
|
+def _iter_escaped_chars(addr):
|
|
|
|
+ pos = 0
|
|
|
|
+ escape = False
|
|
|
|
+ for pos, ch in enumerate(addr):
|
|
|
|
+ if escape:
|
|
|
|
+ yield (pos, '\\' + ch)
|
|
|
|
+ escape = False
|
|
|
|
+ elif ch == '\\':
|
|
|
|
+ escape = True
|
|
|
|
+ else:
|
|
|
|
+ yield (pos, ch)
|
|
|
|
+ if escape:
|
|
|
|
+ yield (pos, '\\')
|
2023-08-03 16:58:20 +02:00
|
|
|
+
|
|
|
|
+
|
2023-12-19 16:40:30 +01:00
|
|
|
+def _strip_quoted_realnames(addr):
|
|
|
|
+ """Strip real names between quotes."""
|
|
|
|
+ if '"' not in addr:
|
|
|
|
+ # Fast path
|
|
|
|
+ return addr
|
2023-08-03 16:58:20 +02:00
|
|
|
+
|
2023-12-19 16:40:30 +01:00
|
|
|
+ start = 0
|
|
|
|
+ open_pos = None
|
|
|
|
+ result = []
|
|
|
|
+ for pos, ch in _iter_escaped_chars(addr):
|
|
|
|
+ if ch == '"':
|
|
|
|
+ if open_pos is None:
|
|
|
|
+ open_pos = pos
|
|
|
|
+ else:
|
|
|
|
+ if start != open_pos:
|
|
|
|
+ result.append(addr[start:open_pos])
|
|
|
|
+ start = pos + 1
|
|
|
|
+ open_pos = None
|
2023-08-03 16:58:20 +02:00
|
|
|
|
2023-12-19 16:40:30 +01:00
|
|
|
-def getaddresses(fieldvalues):
|
2023-08-03 16:58:20 +02:00
|
|
|
- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
|
|
|
|
- all = COMMASPACE.join(str(v) for v in fieldvalues)
|
2023-12-19 16:40:30 +01:00
|
|
|
- a = _AddressList(all)
|
|
|
|
- return a.addresslist
|
|
|
|
+ if start < len(addr):
|
|
|
|
+ result.append(addr[start:])
|
|
|
|
+
|
|
|
|
+ return ''.join(result)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+supports_strict_parsing = True
|
|
|
|
+
|
|
|
|
+def getaddresses(fieldvalues, *, strict=True):
|
2023-08-03 16:58:20 +02:00
|
|
|
+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
|
|
|
|
+
|
|
|
|
+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
|
|
|
|
+ its place.
|
|
|
|
+
|
2023-12-19 16:40:30 +01:00
|
|
|
+ If strict is true, use a strict parser which rejects malformed inputs.
|
2023-08-03 16:58:20 +02:00
|
|
|
+ """
|
2023-12-19 16:40:30 +01:00
|
|
|
+
|
|
|
|
+ # If strict is true, if the resulting list of parsed addresses is greater
|
|
|
|
+ # than the number of fieldvalues in the input list, a parsing error has
|
|
|
|
+ # occurred and consequently a list containing a single empty 2-tuple [('',
|
|
|
|
+ # '')] is returned in its place. This is done to avoid invalid output.
|
|
|
|
+ #
|
|
|
|
+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
|
|
|
|
+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
|
|
|
|
+ # Safe output: [('', '')]
|
|
|
|
+
|
|
|
|
+ if not strict:
|
|
|
|
+ all = COMMASPACE.join(str(v) for v in fieldvalues)
|
|
|
|
+ a = _AddressList(all)
|
|
|
|
+ return a.addresslist
|
|
|
|
+
|
2023-08-03 16:58:20 +02:00
|
|
|
+ fieldvalues = [str(v) for v in fieldvalues]
|
|
|
|
+ fieldvalues = _pre_parse_validation(fieldvalues)
|
2023-12-19 16:40:30 +01:00
|
|
|
+ addr = COMMASPACE.join(fieldvalues)
|
|
|
|
+ a = _AddressList(addr)
|
2023-08-03 16:58:20 +02:00
|
|
|
+ result = _post_parse_validation(a.addresslist)
|
|
|
|
+
|
2023-12-19 16:40:30 +01:00
|
|
|
+ # Treat output as invalid if the number of addresses is not equal to the
|
|
|
|
+ # expected number of addresses.
|
2023-08-03 16:58:20 +02:00
|
|
|
+ n = 0
|
|
|
|
+ for v in fieldvalues:
|
2023-12-19 16:40:30 +01:00
|
|
|
+ # When a comma is used in the Real Name part it is not a deliminator.
|
|
|
|
+ # So strip those out before counting the commas.
|
|
|
|
+ v = _strip_quoted_realnames(v)
|
|
|
|
+ # Expected number of addresses: 1 + number of commas
|
|
|
|
+ n += 1 + v.count(',')
|
2023-08-03 16:58:20 +02:00
|
|
|
+ if len(result) != n:
|
|
|
|
+ return [('', '')]
|
|
|
|
+
|
|
|
|
+ return result
|
2023-12-19 16:40:30 +01:00
|
|
|
+
|
|
|
|
+
|
|
|
|
+def _check_parenthesis(addr):
|
|
|
|
+ # Ignore parenthesis in quoted real names.
|
|
|
|
+ addr = _strip_quoted_realnames(addr)
|
|
|
|
+
|
|
|
|
+ opens = 0
|
|
|
|
+ for pos, ch in _iter_escaped_chars(addr):
|
|
|
|
+ if ch == '(':
|
|
|
|
+ opens += 1
|
|
|
|
+ elif ch == ')':
|
|
|
|
+ opens -= 1
|
|
|
|
+ if opens < 0:
|
|
|
|
+ return False
|
|
|
|
+ return (opens == 0)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def _pre_parse_validation(email_header_fields):
|
|
|
|
+ accepted_values = []
|
|
|
|
+ for v in email_header_fields:
|
|
|
|
+ if not _check_parenthesis(v):
|
|
|
|
+ v = "('', '')"
|
|
|
|
+ accepted_values.append(v)
|
|
|
|
+
|
|
|
|
+ return accepted_values
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def _post_parse_validation(parsed_email_header_tuples):
|
|
|
|
+ accepted_values = []
|
|
|
|
+ # The parser would have parsed a correctly formatted domain-literal
|
|
|
|
+ # The existence of an [ after parsing indicates a parsing failure
|
|
|
|
+ for v in parsed_email_header_tuples:
|
|
|
|
+ if '[' in v[1]:
|
|
|
|
+ v = ('', '')
|
|
|
|
+ accepted_values.append(v)
|
|
|
|
+
|
|
|
|
+ return accepted_values
|
2023-08-03 16:58:20 +02:00
|
|
|
|
|
|
|
|
|
|
|
def _format_timetuple_and_zone(timetuple, zone):
|
2023-12-19 16:40:30 +01:00
|
|
|
@@ -205,16 +321,33 @@ def parsedate_to_datetime(data):
|
|
|
|
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
|
|
|
|
|
|
|
|
|
|
|
|
-def parseaddr(addr):
|
|
|
|
+def parseaddr(addr, *, strict=True):
|
|
|
|
"""
|
|
|
|
Parse addr into its constituent realname and email address parts.
|
|
|
|
|
2023-08-03 16:58:20 +02:00
|
|
|
Return a tuple of realname and email address, unless the parse fails, in
|
|
|
|
which case return a 2-tuple of ('', '').
|
2023-12-19 16:40:30 +01:00
|
|
|
+
|
|
|
|
+ If strict is True, use a strict parser which rejects malformed inputs.
|
2023-08-03 16:58:20 +02:00
|
|
|
"""
|
|
|
|
- addrs = _AddressList(addr).addresslist
|
|
|
|
- if not addrs:
|
|
|
|
- return '', ''
|
2023-12-19 16:40:30 +01:00
|
|
|
+ if not strict:
|
|
|
|
+ addrs = _AddressList(addr).addresslist
|
|
|
|
+ if not addrs:
|
|
|
|
+ return ('', '')
|
|
|
|
+ return addrs[0]
|
|
|
|
+
|
2023-08-03 16:58:20 +02:00
|
|
|
+ if isinstance(addr, list):
|
|
|
|
+ addr = addr[0]
|
|
|
|
+
|
|
|
|
+ if not isinstance(addr, str):
|
|
|
|
+ return ('', '')
|
|
|
|
+
|
|
|
|
+ addr = _pre_parse_validation([addr])[0]
|
|
|
|
+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
|
|
|
|
+
|
|
|
|
+ if not addrs or len(addrs) > 1:
|
|
|
|
+ return ('', '')
|
|
|
|
+
|
|
|
|
return addrs[0]
|
|
|
|
|
|
|
|
|
2024-05-01 11:01:36 +02:00
|
|
|
--- a/Lib/test/test_email/test_email.py
|
|
|
|
+++ b/Lib/test/test_email/test_email.py
|
2023-12-19 16:40:30 +01:00
|
|
|
@@ -17,6 +17,7 @@ from unittest.mock import patch
|
|
|
|
|
|
|
|
import email
|
|
|
|
import email.policy
|
|
|
|
+import email.utils
|
|
|
|
|
|
|
|
from email.charset import Charset
|
|
|
|
from email.generator import Generator, DecodedGenerator, BytesGenerator
|
2024-05-01 11:01:36 +02:00
|
|
|
@@ -3336,15 +3337,137 @@ Foo
|
2023-08-03 16:58:20 +02:00
|
|
|
[('Al Person', 'aperson@dom.ain'),
|
|
|
|
('Bud Person', 'bperson@dom.ain')])
|
|
|
|
|
2023-12-19 16:40:30 +01:00
|
|
|
+ def test_parsing_errors(self):
|
|
|
|
+ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
|
|
|
|
+ alice = 'alice@example.org'
|
|
|
|
+ bob = 'bob@example.com'
|
|
|
|
+ empty = ('', '')
|
|
|
|
+
|
|
|
|
+ # Test utils.getaddresses() and utils.parseaddr() on malformed email
|
|
|
|
+ # addresses: default behavior (strict=True) rejects malformed address,
|
|
|
|
+ # and strict=False which tolerates malformed address.
|
|
|
|
+ for invalid_separator, expected_non_strict in (
|
|
|
|
+ ('(', [(f'<{bob}>', alice)]),
|
|
|
|
+ (')', [('', alice), empty, ('', bob)]),
|
|
|
|
+ ('<', [('', alice), empty, ('', bob), empty]),
|
|
|
|
+ ('>', [('', alice), empty, ('', bob)]),
|
|
|
|
+ ('[', [('', f'{alice}[<{bob}>]')]),
|
|
|
|
+ (']', [('', alice), empty, ('', bob)]),
|
|
|
|
+ ('@', [empty, empty, ('', bob)]),
|
|
|
|
+ (';', [('', alice), empty, ('', bob)]),
|
|
|
|
+ (':', [('', alice), ('', bob)]),
|
|
|
|
+ ('.', [('', alice + '.'), ('', bob)]),
|
|
|
|
+ ('"', [('', alice), ('', f'<{bob}>')]),
|
|
|
|
+ ):
|
|
|
|
+ address = f'{alice}{invalid_separator}<{bob}>'
|
|
|
|
+ with self.subTest(address=address):
|
|
|
|
+ self.assertEqual(utils.getaddresses([address]),
|
|
|
|
+ [empty])
|
|
|
|
+ self.assertEqual(utils.getaddresses([address], strict=False),
|
|
|
|
+ expected_non_strict)
|
|
|
|
+
|
|
|
|
+ self.assertEqual(utils.parseaddr([address]),
|
|
|
|
+ empty)
|
|
|
|
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
|
|
|
+ ('', address))
|
|
|
|
+
|
|
|
|
+ # Comma (',') is treated differently depending on strict parameter.
|
|
|
|
+ # Comma without quotes.
|
|
|
|
+ address = f'{alice},<{bob}>'
|
|
|
|
+ self.assertEqual(utils.getaddresses([address]),
|
|
|
|
+ [('', alice), ('', bob)])
|
|
|
|
+ self.assertEqual(utils.getaddresses([address], strict=False),
|
|
|
|
+ [('', alice), ('', bob)])
|
|
|
|
+ self.assertEqual(utils.parseaddr([address]),
|
|
|
|
+ empty)
|
|
|
|
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
|
|
|
+ ('', address))
|
|
|
|
+
|
|
|
|
+ # Real name between quotes containing comma.
|
|
|
|
+ address = '"Alice, alice@example.org" <bob@example.com>'
|
|
|
|
+ expected_strict = ('Alice, alice@example.org', 'bob@example.com')
|
|
|
|
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
|
|
|
|
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
|
|
|
|
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
|
|
|
|
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
|
|
|
+ ('', address))
|
|
|
|
+
|
|
|
|
+ # Valid parenthesis in comments.
|
|
|
|
+ address = 'alice@example.org (Alice)'
|
|
|
|
+ expected_strict = ('Alice', 'alice@example.org')
|
|
|
|
+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
|
|
|
|
+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
|
|
|
|
+ self.assertEqual(utils.parseaddr([address]), expected_strict)
|
|
|
|
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
|
|
|
+ ('', address))
|
|
|
|
+
|
|
|
|
+ # Invalid parenthesis in comments.
|
|
|
|
+ address = 'alice@example.org )Alice('
|
|
|
|
+ self.assertEqual(utils.getaddresses([address]), [empty])
|
|
|
|
+ self.assertEqual(utils.getaddresses([address], strict=False),
|
|
|
|
+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
|
|
|
|
+ self.assertEqual(utils.parseaddr([address]), empty)
|
|
|
|
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
|
|
|
+ ('', address))
|
|
|
|
+
|
|
|
|
+ # Two addresses with quotes separated by comma.
|
|
|
|
+ address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>'
|
|
|
|
+ self.assertEqual(utils.getaddresses([address]),
|
|
|
|
+ [('Jane Doe', 'jane@example.net'),
|
|
|
|
+ ('John Doe', 'john@example.net')])
|
|
|
|
+ self.assertEqual(utils.getaddresses([address], strict=False),
|
|
|
|
+ [('Jane Doe', 'jane@example.net'),
|
|
|
|
+ ('John Doe', 'john@example.net')])
|
|
|
|
+ self.assertEqual(utils.parseaddr([address]), empty)
|
|
|
|
+ self.assertEqual(utils.parseaddr([address], strict=False),
|
|
|
|
+ ('', address))
|
|
|
|
+
|
|
|
|
+ # Test email.utils.supports_strict_parsing attribute
|
|
|
|
+ self.assertEqual(email.utils.supports_strict_parsing, True)
|
2023-08-03 16:58:20 +02:00
|
|
|
+
|
|
|
|
def test_getaddresses_nasty(self):
|
2023-12-19 16:40:30 +01:00
|
|
|
- eq = self.assertEqual
|
|
|
|
- eq(utils.getaddresses(['foo: ;']), [('', '')])
|
2023-08-03 16:58:20 +02:00
|
|
|
- eq(utils.getaddresses(
|
|
|
|
- ['[]*-- =~$']),
|
|
|
|
- [('', ''), ('', ''), ('', '*--')])
|
2023-12-19 16:40:30 +01:00
|
|
|
- eq(utils.getaddresses(
|
|
|
|
- ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
|
|
|
|
- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
|
|
|
|
+ for addresses, expected in (
|
|
|
|
+ (['"Sürname, Firstname" <to@example.com>'],
|
|
|
|
+ [('Sürname, Firstname', 'to@example.com')]),
|
|
|
|
+
|
|
|
|
+ (['foo: ;'],
|
|
|
|
+ [('', '')]),
|
|
|
|
+
|
|
|
|
+ (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
|
|
|
|
+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
|
|
|
|
+
|
|
|
|
+ ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
|
|
|
|
+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
|
|
|
|
+
|
|
|
|
+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
|
|
|
|
+ [('', '')]),
|
|
|
|
+
|
|
|
|
+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'],
|
|
|
|
+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
|
|
|
|
+
|
|
|
|
+ (['John Doe <jdoe@machine(comment). example>'],
|
|
|
|
+ [('John Doe (comment)', 'jdoe@machine.example')]),
|
|
|
|
+
|
|
|
|
+ (['"Mary Smith: Personal Account" <smith@home.example>'],
|
|
|
|
+ [('Mary Smith: Personal Account', 'smith@home.example')]),
|
|
|
|
+
|
|
|
|
+ (['Undisclosed recipients:;'],
|
|
|
|
+ [('', '')]),
|
|
|
|
+
|
|
|
|
+ ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
|
|
|
|
+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
|
|
|
|
+ ):
|
|
|
|
+ with self.subTest(addresses=addresses):
|
|
|
|
+ self.assertEqual(utils.getaddresses(addresses),
|
|
|
|
+ expected)
|
|
|
|
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
|
|
|
|
+ expected)
|
|
|
|
+
|
|
|
|
+ addresses = ['[]*-- =~$']
|
|
|
|
+ self.assertEqual(utils.getaddresses(addresses),
|
|
|
|
+ [('', '')])
|
|
|
|
+ self.assertEqual(utils.getaddresses(addresses, strict=False),
|
|
|
|
+ [('', ''), ('', ''), ('', '*--')])
|
2023-08-03 16:58:20 +02:00
|
|
|
|
|
|
|
def test_getaddresses_embedded_comment(self):
|
|
|
|
"""Test proper handling of a nested comment"""
|
2024-05-01 11:01:36 +02:00
|
|
|
@@ -3535,6 +3658,54 @@ multipart/report
|
2023-12-19 16:40:30 +01:00
|
|
|
m = cls(*constructor, policy=email.policy.default)
|
|
|
|
self.assertIs(m.policy, email.policy.default)
|
|
|
|
|
|
|
|
+ def test_iter_escaped_chars(self):
|
|
|
|
+ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')),
|
|
|
|
+ [(0, 'a'),
|
|
|
|
+ (2, '\\\\'),
|
|
|
|
+ (3, 'b'),
|
|
|
|
+ (5, '\\"'),
|
|
|
|
+ (6, 'c'),
|
|
|
|
+ (8, '\\\\'),
|
|
|
|
+ (9, '"'),
|
|
|
|
+ (10, 'd')])
|
|
|
|
+ self.assertEqual(list(utils._iter_escaped_chars('a\\')),
|
|
|
|
+ [(0, 'a'), (1, '\\')])
|
|
|
|
+
|
|
|
|
+ def test_strip_quoted_realnames(self):
|
|
|
|
+ def check(addr, expected):
|
|
|
|
+ self.assertEqual(utils._strip_quoted_realnames(addr), expected)
|
|
|
|
+
|
|
|
|
+ check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>',
|
|
|
|
+ ' <jane@example.net>, <john@example.net>')
|
|
|
|
+ check(r'"Jane \"Doe\"." <jane@example.net>',
|
|
|
|
+ ' <jane@example.net>')
|
|
|
|
+
|
|
|
|
+ # special cases
|
|
|
|
+ check(r'before"name"after', 'beforeafter')
|
|
|
|
+ check(r'before"name"', 'before')
|
|
|
|
+ check(r'b"name"', 'b') # single char
|
|
|
|
+ check(r'"name"after', 'after')
|
|
|
|
+ check(r'"name"a', 'a') # single char
|
|
|
|
+ check(r'"name"', '')
|
|
|
|
+
|
|
|
|
+ # no change
|
|
|
|
+ for addr in (
|
|
|
|
+ 'Jane Doe <jane@example.net>, John Doe <john@example.net>',
|
|
|
|
+ 'lone " quote',
|
|
|
|
+ ):
|
|
|
|
+ self.assertEqual(utils._strip_quoted_realnames(addr), addr)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ def test_check_parenthesis(self):
|
|
|
|
+ addr = 'alice@example.net'
|
|
|
|
+ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)'))
|
|
|
|
+ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice('))
|
|
|
|
+ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))'))
|
|
|
|
+ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)'))
|
|
|
|
+
|
|
|
|
+ # Ignore real name between quotes
|
|
|
|
+ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}'))
|
|
|
|
+
|
|
|
|
|
|
|
|
# Test the iterator/generators
|
|
|
|
class TestIterators(TestEmailBase):
|
2023-08-03 16:58:20 +02:00
|
|
|
--- /dev/null
|
2024-05-01 11:01:36 +02:00
|
|
|
+++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
|
2023-12-19 16:40:30 +01:00
|
|
|
@@ -0,0 +1,8 @@
|
|
|
|
+:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now
|
|
|
|
+return ``('', '')`` 2-tuples in more situations where invalid email
|
|
|
|
+addresses are encountered instead of potentially inaccurate values. Add
|
|
|
|
+optional *strict* parameter to these two functions: use ``strict=False`` to
|
|
|
|
+get the old behavior, accept malformed inputs.
|
|
|
|
+``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check
|
|
|
|
+if the *strict* paramater is available. Patch by Thomas Dwyer and Victor
|
|
|
|
+Stinner to improve the CVE-2023-27043 fix.
|