diff --git a/CVE-2023-27043-email-parsing-errors.patch b/CVE-2023-27043-email-parsing-errors.patch index 873514a..df77785 100644 --- a/CVE-2023-27043-email-parsing-errors.patch +++ b/CVE-2023-27043-email-parsing-errors.patch @@ -1,74 +1,185 @@ --- - Doc/library/email.utils.rst | 26 +++ - Lib/email/utils.py | 63 +++++++ - Lib/test/test_email/test_email.py | 81 +++++++++- - Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 4 - 4 files changed, 164 insertions(+), 10 deletions(-) + Doc/library/email.utils.rst | 19 - + Lib/email/utils.py | 151 +++++++- + Lib/test/test_email/test_email.py | 187 +++++++++- + Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 + 4 files changed, 344 insertions(+), 21 deletions(-) -Index: Python-3.10.12/Doc/library/email.utils.rst -=================================================================== ---- Python-3.10.12.orig/Doc/library/email.utils.rst -+++ Python-3.10.12/Doc/library/email.utils.rst -@@ -67,6 +67,11 @@ of the new API. +--- a/Doc/library/email.utils.rst ++++ b/Doc/library/email.utils.rst +@@ -60,13 +60,18 @@ of the new API. + begins with angle brackets, they are stripped off. + + +-.. function:: parseaddr(address) ++.. function:: parseaddr(address, *, strict=True) + + Parse address -- which should be the value of some address-containing field such + as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. -+ .. versionchanged:: 3.12 -+ For security reasons, addresses that were ambiguous and could parse into -+ multiple different addresses now cause ``('', '')`` to be returned -+ instead of only one of the *potential* addresses. ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. + .. function:: formataddr(pair, charset='utf-8') -@@ -89,7 +94,7 @@ of the new API. +@@ -84,12 +89,15 @@ of the new API. + Added the *charset* option. + + +-.. function:: getaddresses(fieldvalues) ++.. function:: getaddresses(fieldvalues, *, strict=True) + This method returns a list of 2-tuples of the form returned by ``parseaddr()``. *fieldvalues* is a sequence of header field values as might be returned by - :meth:`Message.get_all `. Here's a simple +- :meth:`Message.get_all `. Here's a simple - example that gets all the recipients of a message:: -+ example that gets all the recipients of a message: ++ :meth:`Message.get_all `. ++ ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ Here's a simple example that gets all the recipients of a message:: from email.utils import getaddresses -@@ -99,6 +104,25 @@ of the new API. +@@ -99,6 +107,9 @@ of the new API. resent_ccs = msg.get_all('resent-cc', []) all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) -+ When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` -+ is returned in its place. Other errors in parsing the list of -+ addresses such as a fieldvalue seemingly parsing into multiple -+ addresses may result in a list containing a single empty 2-tuple -+ ``[('', '')]`` being returned rather than returning potentially -+ invalid output. -+ -+ Example malformed input parsing: -+ -+ .. doctest:: -+ -+ >>> from email.utils import getaddresses -+ >>> getaddresses(['alice@example.com ', 'me@example.com']) -+ [('', '')] -+ -+ .. versionchanged:: 3.12 -+ The 2-tuple of ``('', '')`` in the returned values when parsing -+ fails were added as to address a security issue. ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. + .. function:: parsedate(date) -Index: Python-3.10.12/Lib/email/utils.py -=================================================================== ---- Python-3.10.12.orig/Lib/email/utils.py -+++ Python-3.10.12/Lib/email/utils.py -@@ -106,12 +106,54 @@ def formataddr(pair, charset='utf-8'): +--- a/Lib/email/utils.py ++++ b/Lib/email/utils.py +@@ -48,6 +48,7 @@ TICK = "'" + specialsre = re.compile(r'[][\\()<>@,:;".]') + escapesre = re.compile(r'[\\"]') + ++ + def _has_surrogates(s): + """Return True if s contains surrogate-escaped binary data.""" + # This check is based on the fact that unless there are surrogates, utf8 +@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'): return address ++def _iter_escaped_chars(addr): ++ pos = 0 ++ escape = False ++ for pos, ch in enumerate(addr): ++ if escape: ++ yield (pos, '\\' + ch) ++ escape = False ++ elif ch == '\\': ++ escape = True ++ else: ++ yield (pos, ch) ++ if escape: ++ yield (pos, '\\') ++ ++ ++def _strip_quoted_realnames(addr): ++ """Strip real names between quotes.""" ++ if '"' not in addr: ++ # Fast path ++ return addr ++ ++ start = 0 ++ open_pos = None ++ result = [] ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '"': ++ if open_pos is None: ++ open_pos = pos ++ else: ++ if start != open_pos: ++ result.append(addr[start:open_pos]) ++ start = pos + 1 ++ open_pos = None + +-def getaddresses(fieldvalues): +- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" +- all = COMMASPACE.join(str(v) for v in fieldvalues) +- a = _AddressList(all) +- return a.addresslist ++ if start < len(addr): ++ result.append(addr[start:]) ++ ++ return ''.join(result) ++ ++ ++supports_strict_parsing = True ++ ++def getaddresses(fieldvalues, *, strict=True): ++ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. ++ ++ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in ++ its place. ++ ++ If strict is true, use a strict parser which rejects malformed inputs. ++ """ ++ ++ # If strict is true, if the resulting list of parsed addresses is greater ++ # than the number of fieldvalues in the input list, a parsing error has ++ # occurred and consequently a list containing a single empty 2-tuple [('', ++ # '')] is returned in its place. This is done to avoid invalid output. ++ # ++ # Malformed input: getaddresses(['alice@example.com ']) ++ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] ++ # Safe output: [('', '')] ++ ++ if not strict: ++ all = COMMASPACE.join(str(v) for v in fieldvalues) ++ a = _AddressList(all) ++ return a.addresslist ++ ++ fieldvalues = [str(v) for v in fieldvalues] ++ fieldvalues = _pre_parse_validation(fieldvalues) ++ addr = COMMASPACE.join(fieldvalues) ++ a = _AddressList(addr) ++ result = _post_parse_validation(a.addresslist) ++ ++ # Treat output as invalid if the number of addresses is not equal to the ++ # expected number of addresses. ++ n = 0 ++ for v in fieldvalues: ++ # When a comma is used in the Real Name part it is not a deliminator. ++ # So strip those out before counting the commas. ++ v = _strip_quoted_realnames(v) ++ # Expected number of addresses: 1 + number of commas ++ n += 1 + v.count(',') ++ if len(result) != n: ++ return [('', '')] ++ ++ return result ++ ++ ++def _check_parenthesis(addr): ++ # Ignore parenthesis in quoted real names. ++ addr = _strip_quoted_realnames(addr) ++ ++ opens = 0 ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '(': ++ opens += 1 ++ elif ch == ')': ++ opens -= 1 ++ if opens < 0: ++ return False ++ return (opens == 0) ++ ++ +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: -+ s = v.replace('\\(', '').replace('\\)', '') -+ if s.count('(') != s.count(')'): ++ if not _check_parenthesis(v): + v = "('', '')" + accepted_values.append(v) + @@ -85,46 +196,32 @@ Index: Python-3.10.12/Lib/email/utils.py + accepted_values.append(v) + + return accepted_values -+ - - def getaddresses(fieldvalues): -- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" -- all = COMMASPACE.join(str(v) for v in fieldvalues) -+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. -+ -+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in -+ its place. -+ -+ If the resulting list of parsed address is not the same as the number of -+ fieldvalues in the input list a parsing error has occurred. A list -+ containing a single empty 2-tuple [('', '')] is returned in its place. -+ This is done to avoid invalid output. -+ """ -+ fieldvalues = [str(v) for v in fieldvalues] -+ fieldvalues = _pre_parse_validation(fieldvalues) -+ all = COMMASPACE.join(v for v in fieldvalues) - a = _AddressList(all) -- return a.addresslist -+ result = _post_parse_validation(a.addresslist) -+ -+ n = 0 -+ for v in fieldvalues: -+ n += v.count(',') + 1 -+ -+ if len(result) != n: -+ return [('', '')] -+ -+ return result def _format_timetuple_and_zone(timetuple, zone): -@@ -212,9 +254,18 @@ def parseaddr(addr): +@@ -205,16 +321,33 @@ def parsedate_to_datetime(data): + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) + + +-def parseaddr(addr): ++def parseaddr(addr, *, strict=True): + """ + Parse addr into its constituent realname and email address parts. + Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). ++ ++ If strict is True, use a strict parser which rejects malformed inputs. """ - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' ++ if not strict: ++ addrs = _AddressList(addr).addresslist ++ if not addrs: ++ return ('', '') ++ return addrs[0] ++ + if isinstance(addr, list): + addr = addr[0] + @@ -140,110 +237,225 @@ Index: Python-3.10.12/Lib/email/utils.py return addrs[0] -Index: Python-3.10.12/Lib/test/test_email/test_email.py -=================================================================== ---- Python-3.10.12.orig/Lib/test/test_email/test_email.py -+++ Python-3.10.12/Lib/test/test_email/test_email.py -@@ -3288,15 +3288,90 @@ Foo +--- a/Lib/test/test_email/test_email.py ++++ b/Lib/test/test_email/test_email.py +@@ -16,6 +16,7 @@ from unittest.mock import patch + + import email + import email.policy ++import email.utils + + from email.charset import Charset + from email.generator import Generator, DecodedGenerator, BytesGenerator +@@ -3288,15 +3289,137 @@ Foo [('Al Person', 'aperson@dom.ain'), ('Bud Person', 'bperson@dom.ain')]) -+ def test_getaddresses_parsing_errors(self): -+ """Test for parsing errors from CVE-2023-27043""" -+ eq = self.assertEqual -+ eq(utils.getaddresses(['alice@example.org(']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org)']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org<']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org>']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org@']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org,']), -+ [('', 'alice@example.org'), ('', 'bob@example.com')]) -+ eq(utils.getaddresses(['alice@example.org;']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org:']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org.']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org"']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org[']), -+ [('', '')]) -+ eq(utils.getaddresses(['alice@example.org]']), -+ [('', '')]) ++ def test_parsing_errors(self): ++ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" ++ alice = 'alice@example.org' ++ bob = 'bob@example.com' ++ empty = ('', '') + -+ def test_parseaddr_parsing_errors(self): -+ """Test for parsing errors from CVE-2023-27043""" -+ eq = self.assertEqual -+ eq(utils.parseaddr(['alice@example.org(']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org)']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org<']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org>']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org@']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org,']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org;']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org:']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org.']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org"']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org[']), -+ ('', '')) -+ eq(utils.parseaddr(['alice@example.org]']), -+ ('', '')) ++ # Test utils.getaddresses() and utils.parseaddr() on malformed email ++ # addresses: default behavior (strict=True) rejects malformed address, ++ # and strict=False which tolerates malformed address. ++ for invalid_separator, expected_non_strict in ( ++ ('(', [(f'<{bob}>', alice)]), ++ (')', [('', alice), empty, ('', bob)]), ++ ('<', [('', alice), empty, ('', bob), empty]), ++ ('>', [('', alice), empty, ('', bob)]), ++ ('[', [('', f'{alice}[<{bob}>]')]), ++ (']', [('', alice), empty, ('', bob)]), ++ ('@', [empty, empty, ('', bob)]), ++ (';', [('', alice), empty, ('', bob)]), ++ (':', [('', alice), ('', bob)]), ++ ('.', [('', alice + '.'), ('', bob)]), ++ ('"', [('', alice), ('', f'<{bob}>')]), ++ ): ++ address = f'{alice}{invalid_separator}<{bob}>' ++ with self.subTest(address=address): ++ self.assertEqual(utils.getaddresses([address]), ++ [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ expected_non_strict) ++ ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Comma (',') is treated differently depending on strict parameter. ++ # Comma without quotes. ++ address = f'{alice},<{bob}>' ++ self.assertEqual(utils.getaddresses([address]), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Real name between quotes containing comma. ++ address = '"Alice, alice@example.org" ' ++ expected_strict = ('Alice, alice@example.org', 'bob@example.com') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Valid parenthesis in comments. ++ address = 'alice@example.org (Alice)' ++ expected_strict = ('Alice', 'alice@example.org') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Invalid parenthesis in comments. ++ address = 'alice@example.org )Alice(' ++ self.assertEqual(utils.getaddresses([address]), [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Two addresses with quotes separated by comma. ++ address = '"Jane Doe" , "John Doe" ' ++ self.assertEqual(utils.getaddresses([address]), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Test email.utils.supports_strict_parsing attribute ++ self.assertEqual(email.utils.supports_strict_parsing, True) + def test_getaddresses_nasty(self): - eq = self.assertEqual - eq(utils.getaddresses(['foo: ;']), [('', '')]) +- eq = self.assertEqual +- eq(utils.getaddresses(['foo: ;']), [('', '')]) - eq(utils.getaddresses( - ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) -+ eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) - eq(utils.getaddresses( - ['foo: ;', '"Jason R. Mastaler" ']), - [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) -+ eq(utils.getaddresses( -+ [r'Pete(A nice \) chap) ']), -+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) -+ eq(utils.getaddresses( -+ ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), -+ [('', '')]) -+ eq(utils.getaddresses( -+ ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), -+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) -+ eq(utils.getaddresses( -+ ['John Doe ']), -+ [('John Doe (comment)', 'jdoe@machine.example')]) -+ eq(utils.getaddresses( -+ ['"Mary Smith: Personal Account" ']), -+ [('Mary Smith: Personal Account', 'smith@home.example')]) -+ eq(utils.getaddresses( -+ ['Undisclosed recipients:;']), -+ [('', '')]) -+ eq(utils.getaddresses( -+ [r', "Giant; \"Big\" Box" ']), -+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) +- eq(utils.getaddresses( +- ['foo: ;', '"Jason R. Mastaler" ']), +- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) ++ for addresses, expected in ( ++ (['"Sürname, Firstname" '], ++ [('Sürname, Firstname', 'to@example.com')]), ++ ++ (['foo: ;'], ++ [('', '')]), ++ ++ (['foo: ;', '"Jason R. Mastaler" '], ++ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), ++ ++ ([r'Pete(A nice \) chap) '], ++ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), ++ ++ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], ++ [('', '')]), ++ ++ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], ++ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), ++ ++ (['John Doe '], ++ [('John Doe (comment)', 'jdoe@machine.example')]), ++ ++ (['"Mary Smith: Personal Account" '], ++ [('Mary Smith: Personal Account', 'smith@home.example')]), ++ ++ (['Undisclosed recipients:;'], ++ [('', '')]), ++ ++ ([r', "Giant; \"Big\" Box" '], ++ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), ++ ): ++ with self.subTest(addresses=addresses): ++ self.assertEqual(utils.getaddresses(addresses), ++ expected) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ expected) ++ ++ addresses = ['[]*-- =~$'] ++ self.assertEqual(utils.getaddresses(addresses), ++ [('', '')]) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ [('', ''), ('', ''), ('', '*--')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" -Index: Python-3.10.12/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -=================================================================== +@@ -3485,6 +3608,54 @@ multipart/report + m = cls(*constructor, policy=email.policy.default) + self.assertIs(m.policy, email.policy.default) + ++ def test_iter_escaped_chars(self): ++ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')), ++ [(0, 'a'), ++ (2, '\\\\'), ++ (3, 'b'), ++ (5, '\\"'), ++ (6, 'c'), ++ (8, '\\\\'), ++ (9, '"'), ++ (10, 'd')]) ++ self.assertEqual(list(utils._iter_escaped_chars('a\\')), ++ [(0, 'a'), (1, '\\')]) ++ ++ def test_strip_quoted_realnames(self): ++ def check(addr, expected): ++ self.assertEqual(utils._strip_quoted_realnames(addr), expected) ++ ++ check('"Jane Doe" , "John Doe" ', ++ ' , ') ++ check(r'"Jane \"Doe\"." ', ++ ' ') ++ ++ # special cases ++ check(r'before"name"after', 'beforeafter') ++ check(r'before"name"', 'before') ++ check(r'b"name"', 'b') # single char ++ check(r'"name"after', 'after') ++ check(r'"name"a', 'a') # single char ++ check(r'"name"', '') ++ ++ # no change ++ for addr in ( ++ 'Jane Doe , John Doe ', ++ 'lone " quote', ++ ): ++ self.assertEqual(utils._strip_quoted_realnames(addr), addr) ++ ++ ++ def test_check_parenthesis(self): ++ addr = 'alice@example.net' ++ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice(')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)')) ++ ++ # Ignore real name between quotes ++ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}')) ++ + + # Test the iterator/generators + class TestIterators(TestEmailBase): --- /dev/null -+++ Python-3.10.12/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -@@ -0,0 +1,4 @@ -+CVE-2023-27043: Prevent :func:`email.utils.parseaddr` -+and :func:`email.utils.getaddresses` from returning the realname portion of an -+invalid RFC2822 email header in the email address portion of the 2-tuple -+returned after being parsed by :class:`email._parseaddr.AddressList`. ++++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst +@@ -0,0 +1,8 @@ ++:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now ++return ``('', '')`` 2-tuples in more situations where invalid email ++addresses are encountered instead of potentially inaccurate values. Add ++optional *strict* parameter to these two functions: use ``strict=False`` to ++get the old behavior, accept malformed inputs. ++``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check ++if the *strict* paramater is available. Patch by Thomas Dwyer and Victor ++Stinner to improve the CVE-2023-27043 fix. diff --git a/Revert-gh105127-left-tests.patch b/Revert-gh105127-left-tests.patch deleted file mode 100644 index 74a92c2..0000000 --- a/Revert-gh105127-left-tests.patch +++ /dev/null @@ -1,283 +0,0 @@ -From 4288c623d62cf90d8e4444facb3379fb06d01140 Mon Sep 17 00:00:00 2001 -From: "Gregory P. Smith" -Date: Thu, 20 Jul 2023 20:30:52 -0700 -Subject: [PATCH] [3.12] gh-106669: Revert "gh-102988: Detect email address - parsing errors ... (GH-105127)" (GH-106733) - -This reverts commit 18dfbd035775c15533d13a98e56b1d2bf5c65f00. -Adds a regression test from the issue. - -See https://github.com/python/cpython/issues/106669.. -(cherry picked from commit a31dea1feb61793e48fa9aa5014f358352205c1d) - -Co-authored-by: Gregory P. Smith ---- - Doc/library/email.utils.rst | 26 -- - Lib/email/utils.py | 63 ------ - Lib/test/test_email/test_email.py | 96 +--------- - Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 5 - 4 files changed, 31 insertions(+), 159 deletions(-) - create mode 100644 Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst - -Index: Python-3.10.13/Doc/library/email.utils.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/email.utils.rst -+++ Python-3.10.13/Doc/library/email.utils.rst -@@ -67,11 +67,6 @@ of the new API. - *email address* parts. Returns a tuple of that information, unless the parse - fails, in which case a 2-tuple of ``('', '')`` is returned. - -- .. versionchanged:: 3.12 -- For security reasons, addresses that were ambiguous and could parse into -- multiple different addresses now cause ``('', '')`` to be returned -- instead of only one of the *potential* addresses. -- - - .. function:: formataddr(pair, charset='utf-8') - -@@ -94,7 +89,7 @@ of the new API. - This method returns a list of 2-tuples of the form returned by ``parseaddr()``. - *fieldvalues* is a sequence of header field values as might be returned by - :meth:`Message.get_all `. Here's a simple -- example that gets all the recipients of a message: -+ example that gets all the recipients of a message:: - - from email.utils import getaddresses - -@@ -104,25 +99,6 @@ of the new API. - resent_ccs = msg.get_all('resent-cc', []) - all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) - -- When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` -- is returned in its place. Other errors in parsing the list of -- addresses such as a fieldvalue seemingly parsing into multiple -- addresses may result in a list containing a single empty 2-tuple -- ``[('', '')]`` being returned rather than returning potentially -- invalid output. -- -- Example malformed input parsing: -- -- .. doctest:: -- -- >>> from email.utils import getaddresses -- >>> getaddresses(['alice@example.com ', 'me@example.com']) -- [('', '')] -- -- .. versionchanged:: 3.12 -- The 2-tuple of ``('', '')`` in the returned values when parsing -- fails were added as to address a security issue. -- - - .. function:: parsedate(date) - -Index: Python-3.10.13/Lib/email/utils.py -=================================================================== ---- Python-3.10.13.orig/Lib/email/utils.py -+++ Python-3.10.13/Lib/email/utils.py -@@ -106,54 +106,12 @@ def formataddr(pair, charset='utf-8'): - return address - - --def _pre_parse_validation(email_header_fields): -- accepted_values = [] -- for v in email_header_fields: -- s = v.replace('\\(', '').replace('\\)', '') -- if s.count('(') != s.count(')'): -- v = "('', '')" -- accepted_values.append(v) -- -- return accepted_values -- -- --def _post_parse_validation(parsed_email_header_tuples): -- accepted_values = [] -- # The parser would have parsed a correctly formatted domain-literal -- # The existence of an [ after parsing indicates a parsing failure -- for v in parsed_email_header_tuples: -- if '[' in v[1]: -- v = ('', '') -- accepted_values.append(v) -- -- return accepted_values -- - - def getaddresses(fieldvalues): -- """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. -- -- When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in -- its place. -- -- If the resulting list of parsed address is not the same as the number of -- fieldvalues in the input list a parsing error has occurred. A list -- containing a single empty 2-tuple [('', '')] is returned in its place. -- This is done to avoid invalid output. -- """ -- fieldvalues = [str(v) for v in fieldvalues] -- fieldvalues = _pre_parse_validation(fieldvalues) -- all = COMMASPACE.join(v for v in fieldvalues) -+ """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" -+ all = COMMASPACE.join(str(v) for v in fieldvalues) - a = _AddressList(all) -- result = _post_parse_validation(a.addresslist) -- -- n = 0 -- for v in fieldvalues: -- n += v.count(',') + 1 -- -- if len(result) != n: -- return [('', '')] -- -- return result -+ return a.addresslist - - - def _format_timetuple_and_zone(timetuple, zone): -@@ -254,18 +212,9 @@ def parseaddr(addr): - Return a tuple of realname and email address, unless the parse fails, in - which case return a 2-tuple of ('', ''). - """ -- if isinstance(addr, list): -- addr = addr[0] -- -- if not isinstance(addr, str): -- return ('', '') -- -- addr = _pre_parse_validation([addr])[0] -- addrs = _post_parse_validation(_AddressList(addr).addresslist) -- -- if not addrs or len(addrs) > 1: -- return ('', '') -- -+ addrs = _AddressList(addr).addresslist -+ if not addrs: -+ return '', '' - return addrs[0] - - -Index: Python-3.10.13/Lib/test/test_email/test_email.py -=================================================================== ---- Python-3.10.13.orig/Lib/test/test_email/test_email.py -+++ Python-3.10.13/Lib/test/test_email/test_email.py -@@ -3288,90 +3288,32 @@ Foo - [('Al Person', 'aperson@dom.ain'), - ('Bud Person', 'bperson@dom.ain')]) - -- def test_getaddresses_parsing_errors(self): -- """Test for parsing errors from CVE-2023-27043""" -- eq = self.assertEqual -- eq(utils.getaddresses(['alice@example.org(']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org)']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org<']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org>']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org@']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org,']), -- [('', 'alice@example.org'), ('', 'bob@example.com')]) -- eq(utils.getaddresses(['alice@example.org;']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org:']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org.']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org"']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org[']), -- [('', '')]) -- eq(utils.getaddresses(['alice@example.org]']), -- [('', '')]) -- -- def test_parseaddr_parsing_errors(self): -- """Test for parsing errors from CVE-2023-27043""" -- eq = self.assertEqual -- eq(utils.parseaddr(['alice@example.org(']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org)']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org<']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org>']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org@']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org,']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org;']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org:']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org.']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org"']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org[']), -- ('', '')) -- eq(utils.parseaddr(['alice@example.org]']), -- ('', '')) -+ def test_getaddresses_comma_in_name(self): -+ """GH-106669 regression test.""" -+ self.assertEqual( -+ utils.getaddresses( -+ [ -+ '"Bud, Person" ', -+ 'aperson@dom.ain (Al Person)', -+ '"Mariusz Felisiak" ', -+ ] -+ ), -+ [ -+ ('Bud, Person', 'bperson@dom.ain'), -+ ('Al Person', 'aperson@dom.ain'), -+ ('Mariusz Felisiak', 'to@example.com'), -+ ], -+ ) - - def test_getaddresses_nasty(self): - eq = self.assertEqual - eq(utils.getaddresses(['foo: ;']), [('', '')]) -- eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) -+ eq(utils.getaddresses( -+ ['[]*-- =~$']), -+ [('', ''), ('', ''), ('', '*--')]) - eq(utils.getaddresses( - ['foo: ;', '"Jason R. Mastaler" ']), - [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) -- eq(utils.getaddresses( -- [r'Pete(A nice \) chap) ']), -- [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) -- eq(utils.getaddresses( -- ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), -- [('', '')]) -- eq(utils.getaddresses( -- ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), -- [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) -- eq(utils.getaddresses( -- ['John Doe ']), -- [('John Doe (comment)', 'jdoe@machine.example')]) -- eq(utils.getaddresses( -- ['"Mary Smith: Personal Account" ']), -- [('Mary Smith: Personal Account', 'smith@home.example')]) -- eq(utils.getaddresses( -- ['Undisclosed recipients:;']), -- [('', '')]) -- eq(utils.getaddresses( -- [r', "Giant; \"Big\" Box" ']), -- [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) - - def test_getaddresses_embedded_comment(self): - """Test proper handling of a nested comment""" -Index: Python-3.10.13/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -=================================================================== ---- Python-3.10.13.orig/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -+++ Python-3.10.13/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst -@@ -1,3 +1,8 @@ -+Reverted the :mod:`email.utils` security improvement change released in -+3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail -+to parse email addresses with a comma in the quoted name field. -+See :gh:`106669`. -+ - CVE-2023-27043: Prevent :func:`email.utils.parseaddr` - and :func:`email.utils.getaddresses` from returning the realname portion of an - invalid RFC2822 email header in the email address portion of the 2-tuple diff --git a/fix-sphinx-72.patch b/fix-sphinx-72.patch index 22d2366..dc3d130 100644 --- a/fix-sphinx-72.patch +++ b/fix-sphinx-72.patch @@ -1,66 +1,172 @@ -Index: Python-3.10.13/Doc/conf.py -=================================================================== ---- Python-3.10.13.orig/Doc/conf.py -+++ Python-3.10.13/Doc/conf.py -@@ -61,6 +61,11 @@ smartquotes_excludes = { - # Avoid a warning with Sphinx >= 2.0 - master_doc = 'contents' +--- + Doc/c-api/bytearray.rst | 2 + Doc/c-api/bytes.rst | 2 + Doc/c-api/capsule.rst | 2 + Doc/c-api/complex.rst | 2 + Doc/c-api/concrete.rst | 6 - + Doc/c-api/dict.rst | 4 + Doc/c-api/exceptions.rst | 6 - + Doc/c-api/file.rst | 2 + Doc/c-api/float.rst | 2 + Doc/c-api/function.rst | 2 + Doc/c-api/import.rst | 4 + Doc/c-api/init.rst | 14 +-- + Doc/c-api/intro.rst | 8 - + Doc/c-api/list.rst | 6 - + Doc/c-api/long.rst | 4 + Doc/c-api/mapping.rst | 2 + Doc/c-api/memoryview.rst | 2 + Doc/c-api/method.rst | 4 + Doc/c-api/module.rst | 2 + Doc/c-api/none.rst | 2 + Doc/c-api/number.rst | 12 +- + Doc/c-api/object.rst | 12 +- + Doc/c-api/sequence.rst | 4 + Doc/c-api/set.rst | 6 - + Doc/c-api/structures.rst | 4 + Doc/c-api/tuple.rst | 2 + Doc/c-api/type.rst | 2 + Doc/c-api/typeobj.rst | 4 + Doc/conf.py | 5 + + Doc/extending/newtypes.rst | 2 + Doc/library/_thread.rst | 2 + Doc/library/binascii.rst | 6 - + Doc/library/cmath.rst | 2 + Doc/library/copy.rst | 2 + Doc/library/copyreg.rst | 4 + Doc/library/dis.rst | 2 + Doc/library/exceptions.rst | 10 +- + Doc/library/fnmatch.rst | 4 + Doc/library/functions.rst | 10 +- + Doc/library/http.client.rst | 2 + Doc/library/imp.rst | 2 + Doc/library/internet.rst | 2 + Doc/library/locale.rst | 4 + Doc/library/marshal.rst | 4 + Doc/library/os.path.rst | 2 + Doc/library/os.rst | 4 + Doc/library/pdb.rst | 4 + Doc/library/posix.rst | 2 + Doc/library/pprint.rst | 4 + Doc/library/pwd.rst | 2 + Doc/library/pyexpat.rst | 2 + Doc/library/runpy.rst | 4 + Doc/library/shelve.rst | 6 - + Doc/library/site.rst | 6 - + Doc/library/socket.rst | 4 + Doc/library/stdtypes.rst | 146 ++++++++++++++++---------------- + Doc/library/sys.rst | 2 + Doc/library/traceback.rst | 2 + Doc/library/types.rst | 2 + Doc/reference/compound_stmts.rst | 90 +++++++++---------- + Doc/reference/datamodel.rst | 154 +++++++++++++++++----------------- + Doc/reference/executionmodel.rst | 2 + Doc/reference/expressions.rst | 134 ++++++++++++++--------------- + Doc/reference/simple_stmts.rst | 74 ++++++++-------- + Doc/reference/toplevel_components.rst | 10 +- + Doc/tools/extensions/pyspecific.py | 25 +++++ + Doc/tutorial/classes.rst | 2 + Doc/tutorial/controlflow.rst | 2 + Doc/tutorial/inputoutput.rst | 6 - + Doc/tutorial/modules.rst | 4 + Doc/tutorial/stdlib.rst | 2 + 71 files changed, 457 insertions(+), 427 deletions(-) + +--- a/Doc/c-api/bytearray.rst ++++ b/Doc/c-api/bytearray.rst +@@ -5,7 +5,7 @@ + Byte Array Objects + ------------------ -+# Allow translation of index directives -+gettext_additional_targets = [ -+ 'index', -+] -+ - # Options for HTML output - # ----------------------- - -Index: Python-3.10.13/Doc/tools/extensions/pyspecific.py -=================================================================== ---- Python-3.10.13.orig/Doc/tools/extensions/pyspecific.py -+++ Python-3.10.13/Doc/tools/extensions/pyspecific.py -@@ -623,6 +623,30 @@ def process_audit_events(app, doctree, f - node.replace_self(table) +-.. index:: object: bytearray ++.. index:: pair: object; bytearray -+def patch_pairindextypes(app) -> None: -+ if app.builder.name != 'gettext': -+ return -+ -+ # allow translating deprecated index entries -+ try: -+ from sphinx.domains.python import pairindextypes -+ except ImportError: -+ pass -+ else: -+ # Sphinx checks if a 'pair' type entry on an index directive is one of -+ # the Sphinx-translated pairindextypes values. As we intend to move -+ # away from this, we need Sphinx to believe that these values don't -+ # exist, by deleting them when using the gettext builder. -+ -+ pairindextypes.pop('module', None) -+ pairindextypes.pop('keyword', None) -+ pairindextypes.pop('operator', None) -+ pairindextypes.pop('object', None) -+ pairindextypes.pop('exception', None) -+ pairindextypes.pop('statement', None) -+ pairindextypes.pop('builtin', None) -+ -+ - def setup(app): - app.add_role('issue', issue_role) - app.add_role('gh', gh_issue_role) -@@ -645,6 +669,7 @@ def setup(app): - app.add_directive_to_domain('py', 'awaitablemethod', PyAwaitableMethod) - app.add_directive_to_domain('py', 'abstractmethod', PyAbstractMethod) - app.add_directive('miscnews', MiscNews) -+ app.connect('builder-inited', patch_pairindextypes) - app.connect('doctree-resolved', process_audit_events) - app.connect('env-merge-info', audit_events_merge) - app.connect('env-purge-doc', audit_events_purge) -Index: Python-3.10.13/Doc/c-api/exceptions.rst -=================================================================== ---- Python-3.10.13.orig/Doc/c-api/exceptions.rst -+++ Python-3.10.13/Doc/c-api/exceptions.rst + .. c:type:: PyByteArrayObject +--- a/Doc/c-api/bytes.rst ++++ b/Doc/c-api/bytes.rst +@@ -8,7 +8,7 @@ Bytes Objects + These functions raise :exc:`TypeError` when expecting a bytes parameter and + called with a non-bytes parameter. + +-.. index:: object: bytes ++.. index:: pair: object; bytes + + + .. c:type:: PyBytesObject +--- a/Doc/c-api/capsule.rst ++++ b/Doc/c-api/capsule.rst +@@ -5,7 +5,7 @@ + Capsules + -------- + +-.. index:: object: Capsule ++.. index:: pair: object; Capsule + + Refer to :ref:`using-capsules` for more information on using these objects. + +--- a/Doc/c-api/complex.rst ++++ b/Doc/c-api/complex.rst +@@ -5,7 +5,7 @@ + Complex Number Objects + ---------------------- + +-.. index:: object: complex number ++.. index:: pair: object; complex number + + Python's complex number objects are implemented as two distinct types when + viewed from the C API: one is the Python object exposed to Python programs, and +--- a/Doc/c-api/concrete.rst ++++ b/Doc/c-api/concrete.rst +@@ -40,7 +40,7 @@ This section describes Python type objec + Numeric Objects + =============== + +-.. index:: object: numeric ++.. index:: pair: object; numeric + + .. toctree:: + +@@ -55,7 +55,7 @@ Numeric Objects + Sequence Objects + ================ + +-.. index:: object: sequence ++.. index:: pair: object; sequence + + Generic operations on sequence objects were discussed in the previous chapter; + this section deals with the specific kinds of sequence objects that are +@@ -77,7 +77,7 @@ intrinsic to the Python language. + Container Objects + ================= + +-.. index:: object: mapping ++.. index:: pair: object; mapping + + .. toctree:: + +--- a/Doc/c-api/dict.rst ++++ b/Doc/c-api/dict.rst +@@ -5,7 +5,7 @@ + Dictionary Objects + ------------------ + +-.. index:: object: dictionary ++.. index:: pair: object; dictionary + + + .. c:type:: PyDictObject +@@ -154,7 +154,7 @@ Dictionary Objects + + .. c:function:: Py_ssize_t PyDict_Size(PyObject *p) + +- .. index:: builtin: len ++ .. index:: pair: built-in function; len + + Return the number of items in the dictionary. This is equivalent to + ``len(p)`` on a dictionary. +--- a/Doc/c-api/exceptions.rst ++++ b/Doc/c-api/exceptions.rst @@ -503,7 +503,7 @@ Signal Handling .. c:function:: int PyErr_CheckSignals() @@ -88,10 +194,61 @@ Index: Python-3.10.13/Doc/c-api/exceptions.rst single: KeyboardInterrupt (built-in exception) Simulate the effect of a signal arriving. The next time -Index: Python-3.10.13/Doc/c-api/init.rst -=================================================================== ---- Python-3.10.13.orig/Doc/c-api/init.rst -+++ Python-3.10.13/Doc/c-api/init.rst +--- a/Doc/c-api/file.rst ++++ b/Doc/c-api/file.rst +@@ -5,7 +5,7 @@ + File Objects + ------------ + +-.. index:: object: file ++.. index:: pair: object; file + + These APIs are a minimal emulation of the Python 2 C API for built-in file + objects, which used to rely on the buffered I/O (:c:expr:`FILE*`) support +--- a/Doc/c-api/float.rst ++++ b/Doc/c-api/float.rst +@@ -5,7 +5,7 @@ + Floating Point Objects + ---------------------- + +-.. index:: object: floating point ++.. index:: pair: object; floating point + + + .. c:type:: PyFloatObject +--- a/Doc/c-api/function.rst ++++ b/Doc/c-api/function.rst +@@ -5,7 +5,7 @@ + Function Objects + ---------------- + +-.. index:: object: function ++.. index:: pair: object; function + + There are a few functions specific to Python functions. + +--- a/Doc/c-api/import.rst ++++ b/Doc/c-api/import.rst +@@ -41,7 +41,7 @@ Importing Modules + + .. c:function:: PyObject* PyImport_ImportModuleEx(const char *name, PyObject *globals, PyObject *locals, PyObject *fromlist) + +- .. index:: builtin: __import__ ++ .. index:: pair: built-in function; __import__ + + Import a module. This is best described by referring to the built-in Python + function :func:`__import__`. +@@ -120,7 +120,7 @@ Importing Modules + + .. c:function:: PyObject* PyImport_ExecCodeModule(const char *name, PyObject *co) + +- .. index:: builtin: compile ++ .. index:: pair: built-in function; compile + + Given a module name (possibly of the form ``package.module``) and a code object + read from a Python bytecode file or obtained from the built-in function +--- a/Doc/c-api/init.rst ++++ b/Doc/c-api/init.rst @@ -233,9 +233,9 @@ Initializing and finalizing the interpre single: PyEval_InitThreads() single: modules (in module sys) @@ -127,10 +284,8 @@ Index: Python-3.10.13/Doc/c-api/init.rst single: stdout (in module sys) single: stderr (in module sys) single: stdin (in module sys) -Index: Python-3.10.13/Doc/c-api/intro.rst -=================================================================== ---- Python-3.10.13.orig/Doc/c-api/intro.rst -+++ Python-3.10.13/Doc/c-api/intro.rst +--- a/Doc/c-api/intro.rst ++++ b/Doc/c-api/intro.rst @@ -226,7 +226,7 @@ complete listing. Objects, Types and Reference Counts =================================== @@ -153,10 +308,355 @@ Index: Python-3.10.13/Doc/c-api/intro.rst triple: module; search; path single: path (in module sys) -Index: Python-3.10.13/Doc/library/_thread.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/_thread.rst -+++ Python-3.10.13/Doc/library/_thread.rst +--- a/Doc/c-api/list.rst ++++ b/Doc/c-api/list.rst +@@ -5,7 +5,7 @@ + List Objects + ------------ + +-.. index:: object: list ++.. index:: pair: object; list + + + .. c:type:: PyListObject +@@ -45,7 +45,7 @@ List Objects + + .. c:function:: Py_ssize_t PyList_Size(PyObject *list) + +- .. index:: builtin: len ++ .. index:: pair: built-in function; len + + Return the length of the list object in *list*; this is equivalent to + ``len(list)`` on a list object. +@@ -138,7 +138,7 @@ List Objects + + .. c:function:: PyObject* PyList_AsTuple(PyObject *list) + +- .. index:: builtin: tuple ++ .. index:: pair: built-in function; tuple + + Return a new tuple object containing the contents of *list*; equivalent to + ``tuple(list)``. +--- a/Doc/c-api/long.rst ++++ b/Doc/c-api/long.rst +@@ -5,8 +5,8 @@ + Integer Objects + --------------- + +-.. index:: object: long integer +- object: integer ++.. index:: pair: object; long integer ++ pair: object; integer + + All integers are implemented as "long" integer objects of arbitrary size. + +--- a/Doc/c-api/mapping.rst ++++ b/Doc/c-api/mapping.rst +@@ -20,7 +20,7 @@ See also :c:func:`PyObject_GetItem`, :c: + .. c:function:: Py_ssize_t PyMapping_Size(PyObject *o) + Py_ssize_t PyMapping_Length(PyObject *o) + +- .. index:: builtin: len ++ .. index:: pair: built-in function; len + + Returns the number of keys in object *o* on success, and ``-1`` on failure. + This is equivalent to the Python expression ``len(o)``. +--- a/Doc/c-api/memoryview.rst ++++ b/Doc/c-api/memoryview.rst +@@ -3,7 +3,7 @@ + .. _memoryview-objects: + + .. index:: +- object: memoryview ++ pair: object; memoryview + + MemoryView objects + ------------------ +--- a/Doc/c-api/method.rst ++++ b/Doc/c-api/method.rst +@@ -5,7 +5,7 @@ + Instance Method Objects + ----------------------- + +-.. index:: object: instancemethod ++.. index:: pair: object; instancemethod + + An instance method is a wrapper for a :c:data:`PyCFunction` and the new way + to bind a :c:data:`PyCFunction` to a class object. It replaces the former call +@@ -47,7 +47,7 @@ to bind a :c:data:`PyCFunction` to a cla + Method Objects + -------------- + +-.. index:: object: method ++.. index:: pair: object; method + + Methods are bound function objects. Methods are always bound to an instance of + a user-defined class. Unbound methods (methods bound to a class object) are +--- a/Doc/c-api/module.rst ++++ b/Doc/c-api/module.rst +@@ -5,7 +5,7 @@ + Module Objects + -------------- + +-.. index:: object: module ++.. index:: pair: object; module + + + .. c:var:: PyTypeObject PyModule_Type +--- a/Doc/c-api/none.rst ++++ b/Doc/c-api/none.rst +@@ -5,7 +5,7 @@ + The ``None`` Object + ------------------- + +-.. index:: object: None ++.. index:: pair: object; None + + Note that the :c:type:`PyTypeObject` for ``None`` is not directly exposed in the + Python/C API. Since ``None`` is a singleton, testing for object identity (using +--- a/Doc/c-api/number.rst ++++ b/Doc/c-api/number.rst +@@ -64,7 +64,7 @@ Number Protocol + + .. c:function:: PyObject* PyNumber_Divmod(PyObject *o1, PyObject *o2) + +- .. index:: builtin: divmod ++ .. index:: pair: built-in function; divmod + + See the built-in function :func:`divmod`. Returns ``NULL`` on failure. This is + the equivalent of the Python expression ``divmod(o1, o2)``. +@@ -72,7 +72,7 @@ Number Protocol + + .. c:function:: PyObject* PyNumber_Power(PyObject *o1, PyObject *o2, PyObject *o3) + +- .. index:: builtin: pow ++ .. index:: pair: built-in function; pow + + See the built-in function :func:`pow`. Returns ``NULL`` on failure. This is the + equivalent of the Python expression ``pow(o1, o2, o3)``, where *o3* is optional. +@@ -94,7 +94,7 @@ Number Protocol + + .. c:function:: PyObject* PyNumber_Absolute(PyObject *o) + +- .. index:: builtin: abs ++ .. index:: pair: built-in function; abs + + Returns the absolute value of *o*, or ``NULL`` on failure. This is the equivalent + of the Python expression ``abs(o)``. +@@ -192,7 +192,7 @@ Number Protocol + + .. c:function:: PyObject* PyNumber_InPlacePower(PyObject *o1, PyObject *o2, PyObject *o3) + +- .. index:: builtin: pow ++ .. index:: pair: built-in function; pow + + See the built-in function :func:`pow`. Returns ``NULL`` on failure. The operation + is done *in-place* when *o1* supports it. This is the equivalent of the Python +@@ -238,7 +238,7 @@ Number Protocol + + .. c:function:: PyObject* PyNumber_Long(PyObject *o) + +- .. index:: builtin: int ++ .. index:: pair: built-in function; int + + Returns the *o* converted to an integer object on success, or ``NULL`` on + failure. This is the equivalent of the Python expression ``int(o)``. +@@ -246,7 +246,7 @@ Number Protocol + + .. c:function:: PyObject* PyNumber_Float(PyObject *o) + +- .. index:: builtin: float ++ .. index:: pair: built-in function; float + + Returns the *o* converted to a float object on success, or ``NULL`` on failure. + This is the equivalent of the Python expression ``float(o)``. +--- a/Doc/c-api/object.rst ++++ b/Doc/c-api/object.rst +@@ -172,7 +172,7 @@ Object Protocol + + .. c:function:: PyObject* PyObject_Repr(PyObject *o) + +- .. index:: builtin: repr ++ .. index:: pair: built-in function; repr + + Compute a string representation of object *o*. Returns the string + representation on success, ``NULL`` on failure. This is the equivalent of the +@@ -184,7 +184,7 @@ Object Protocol + + .. c:function:: PyObject* PyObject_ASCII(PyObject *o) + +- .. index:: builtin: ascii ++ .. index:: pair: built-in function; ascii + + As :c:func:`PyObject_Repr`, compute a string representation of object *o*, but + escape the non-ASCII characters in the string returned by +@@ -209,7 +209,7 @@ Object Protocol + + .. c:function:: PyObject* PyObject_Bytes(PyObject *o) + +- .. index:: builtin: bytes ++ .. index:: pair: built-in function; bytes + + Compute a bytes representation of object *o*. ``NULL`` is returned on + failure and a bytes object on success. This is equivalent to the Python +@@ -260,7 +260,7 @@ Object Protocol + + .. c:function:: Py_hash_t PyObject_Hash(PyObject *o) + +- .. index:: builtin: hash ++ .. index:: pair: built-in function; hash + + Compute and return the hash value of an object *o*. On failure, return ``-1``. + This is the equivalent of the Python expression ``hash(o)``. +@@ -294,7 +294,7 @@ Object Protocol + + .. c:function:: PyObject* PyObject_Type(PyObject *o) + +- .. index:: builtin: type ++ .. index:: pair: built-in function; type + + When *o* is non-``NULL``, returns a type object corresponding to the object type + of object *o*. On failure, raises :exc:`SystemError` and returns ``NULL``. This +@@ -315,7 +315,7 @@ Object Protocol + .. c:function:: Py_ssize_t PyObject_Size(PyObject *o) + Py_ssize_t PyObject_Length(PyObject *o) + +- .. index:: builtin: len ++ .. index:: pair: built-in function; len + + Return the length of object *o*. If the object *o* provides either the sequence + and mapping protocols, the sequence length is returned. On error, ``-1`` is +--- a/Doc/c-api/sequence.rst ++++ b/Doc/c-api/sequence.rst +@@ -18,7 +18,7 @@ Sequence Protocol + .. c:function:: Py_ssize_t PySequence_Size(PyObject *o) + Py_ssize_t PySequence_Length(PyObject *o) + +- .. index:: builtin: len ++ .. index:: pair: built-in function; len + + Returns the number of objects in sequence *o* on success, and ``-1`` on + failure. This is equivalent to the Python expression ``len(o)``. +@@ -120,7 +120,7 @@ Sequence Protocol + + .. c:function:: PyObject* PySequence_Tuple(PyObject *o) + +- .. index:: builtin: tuple ++ .. index:: pair: built-in function; tuple + + Return a tuple object with the same contents as the sequence or iterable *o*, + or ``NULL`` on failure. If *o* is a tuple, a new reference will be returned, +--- a/Doc/c-api/set.rst ++++ b/Doc/c-api/set.rst +@@ -9,8 +9,8 @@ Set Objects + + + .. index:: +- object: set +- object: frozenset ++ pair: object; set ++ pair: object; frozenset + + This section details the public API for :class:`set` and :class:`frozenset` + objects. Any functionality not listed below is best accessed using either +@@ -107,7 +107,7 @@ or :class:`frozenset` or instances of th + + .. c:function:: Py_ssize_t PySet_Size(PyObject *anyset) + +- .. index:: builtin: len ++ .. index:: pair: built-in function; len + + Return the length of a :class:`set` or :class:`frozenset` object. Equivalent to + ``len(anyset)``. Raises a :exc:`PyExc_SystemError` if *anyset* is not a +--- a/Doc/c-api/structures.rst ++++ b/Doc/c-api/structures.rst +@@ -351,7 +351,7 @@ method. + + .. data:: METH_CLASS + +- .. index:: builtin: classmethod ++ .. index:: pair: built-in function; classmethod + + The method will be passed the type object as the first parameter rather + than an instance of the type. This is used to create *class methods*, +@@ -361,7 +361,7 @@ method. + + .. data:: METH_STATIC + +- .. index:: builtin: staticmethod ++ .. index:: pair: built-in function; staticmethod + + The method will be passed ``NULL`` as the first parameter rather than an + instance of the type. This is used to create *static methods*, similar to +--- a/Doc/c-api/tuple.rst ++++ b/Doc/c-api/tuple.rst +@@ -5,7 +5,7 @@ + Tuple Objects + ------------- + +-.. index:: object: tuple ++.. index:: pair: object; tuple + + + .. c:type:: PyTupleObject +--- a/Doc/c-api/type.rst ++++ b/Doc/c-api/type.rst +@@ -5,7 +5,7 @@ + Type Objects + ------------ + +-.. index:: object: type ++.. index:: pair: object; type + + + .. c:type:: PyTypeObject +--- a/Doc/c-api/typeobj.rst ++++ b/Doc/c-api/typeobj.rst +@@ -803,7 +803,7 @@ and :c:type:`PyType_Type` effectively ac + + .. c:member:: reprfunc PyTypeObject.tp_repr + +- .. index:: builtin: repr ++ .. index:: pair: built-in function; repr + + An optional pointer to a function that implements the built-in function + :func:`repr`. +@@ -868,7 +868,7 @@ and :c:type:`PyType_Type` effectively ac + + .. c:member:: hashfunc PyTypeObject.tp_hash + +- .. index:: builtin: hash ++ .. index:: pair: built-in function; hash + + An optional pointer to a function that implements the built-in function + :func:`hash`. +--- a/Doc/conf.py ++++ b/Doc/conf.py +@@ -61,6 +61,11 @@ smartquotes_excludes = { + # Avoid a warning with Sphinx >= 2.0 + master_doc = 'contents' + ++# Allow translation of index directives ++gettext_additional_targets = [ ++ 'index', ++] ++ + # Options for HTML output + # ----------------------- + +--- a/Doc/extending/newtypes.rst ++++ b/Doc/extending/newtypes.rst +@@ -149,7 +149,7 @@ done. This can be done using the :c:fun + + .. index:: + single: string; object representation +- builtin: repr ++ pair: built-in function; repr + + Object Presentation + ------------------- +--- a/Doc/library/_thread.rst ++++ b/Doc/library/_thread.rst @@ -204,7 +204,7 @@ In addition to these methods, lock objec **Caveats:** @@ -166,10 +666,23 @@ Index: Python-3.10.13/Doc/library/_thread.rst * Threads interact strangely with interrupts: the :exc:`KeyboardInterrupt` exception will be received by an arbitrary thread. (When the :mod:`signal` -Index: Python-3.10.13/Doc/library/cmath.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/cmath.rst -+++ Python-3.10.13/Doc/library/cmath.rst +--- a/Doc/library/binascii.rst ++++ b/Doc/library/binascii.rst +@@ -6,9 +6,9 @@ + representations. + + .. index:: +- module: uu +- module: base64 +- module: binhex ++ pair: module; uu ++ pair: module; base64 ++ pair: module; binhex + + -------------- + +--- a/Doc/library/cmath.rst ++++ b/Doc/library/cmath.rst @@ -301,7 +301,7 @@ Constants .. versionadded:: 3.6 @@ -179,10 +692,8 @@ Index: Python-3.10.13/Doc/library/cmath.rst Note that the selection of functions is similar, but not identical, to that in module :mod:`math`. The reason for having two modules is that some users aren't -Index: Python-3.10.13/Doc/library/copy.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/copy.rst -+++ Python-3.10.13/Doc/library/copy.rst +--- a/Doc/library/copy.rst ++++ b/Doc/library/copy.rst @@ -68,7 +68,7 @@ Shallow copies of dictionaries can be ma of lists by assigning a slice of the entire list, for example, ``copied_list = original_list[:]``. @@ -192,10 +703,8 @@ Index: Python-3.10.13/Doc/library/copy.rst Classes can use the same interfaces to control copying that they use to control pickling. See the description of module :mod:`pickle` for information on these -Index: Python-3.10.13/Doc/library/copyreg.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/copyreg.rst -+++ Python-3.10.13/Doc/library/copyreg.rst +--- a/Doc/library/copyreg.rst ++++ b/Doc/library/copyreg.rst @@ -7,8 +7,8 @@ **Source code:** :source:`Lib/copyreg.py` @@ -207,10 +716,19 @@ Index: Python-3.10.13/Doc/library/copyreg.rst -------------- -Index: Python-3.10.13/Doc/library/exceptions.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/exceptions.rst -+++ Python-3.10.13/Doc/library/exceptions.rst +--- a/Doc/library/dis.rst ++++ b/Doc/library/dis.rst +@@ -1207,7 +1207,7 @@ All of the following opcodes use their a + + .. opcode:: BUILD_SLICE (argc) + +- .. index:: builtin: slice ++ .. index:: pair: built-in function; slice + + Pushes a slice object on the stack. *argc* must be 2 or 3. If it is 2, + ``slice(TOS1, TOS)`` is pushed; if it is 3, ``slice(TOS2, TOS1, TOS)`` is +--- a/Doc/library/exceptions.rst ++++ b/Doc/library/exceptions.rst @@ -4,8 +4,8 @@ Built-in Exceptions =================== @@ -249,10 +767,8 @@ Index: Python-3.10.13/Doc/library/exceptions.rst This exception is raised when a system function returns a system-related error, including I/O failures such as "file not found" or "disk full" -Index: Python-3.10.13/Doc/library/fnmatch.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/fnmatch.rst -+++ Python-3.10.13/Doc/library/fnmatch.rst +--- a/Doc/library/fnmatch.rst ++++ b/Doc/library/fnmatch.rst @@ -8,7 +8,7 @@ .. index:: single: filenames; wildcard expansion @@ -271,10 +787,8 @@ Index: Python-3.10.13/Doc/library/fnmatch.rst Note that the filename separator (``'/'`` on Unix) is *not* special to this module. See module :mod:`glob` for pathname expansion (:mod:`glob` uses -Index: Python-3.10.13/Doc/library/functions.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/functions.rst -+++ Python-3.10.13/Doc/library/functions.rst +--- a/Doc/library/functions.rst ++++ b/Doc/library/functions.rst @@ -548,7 +548,7 @@ are always available. They are listed h Raises an :ref:`auditing event ` ``exec`` with the code object as the argument. Code compilation events may also be raised. @@ -313,10 +827,8 @@ Index: Python-3.10.13/Doc/library/functions.rst .. note:: -Index: Python-3.10.13/Doc/library/http.client.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/http.client.rst -+++ Python-3.10.13/Doc/library/http.client.rst +--- a/Doc/library/http.client.rst ++++ b/Doc/library/http.client.rst @@ -10,7 +10,7 @@ pair: HTTP; protocol single: HTTP; http.client (standard module) @@ -326,10 +838,19 @@ Index: Python-3.10.13/Doc/library/http.client.rst -------------- -Index: Python-3.10.13/Doc/library/internet.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/internet.rst -+++ Python-3.10.13/Doc/library/internet.rst +--- a/Doc/library/imp.rst ++++ b/Doc/library/imp.rst +@@ -10,7 +10,7 @@ + .. deprecated:: 3.4 + The :mod:`imp` module is deprecated in favor of :mod:`importlib`. + +-.. index:: statement: import ++.. index:: pair: statement; import + + -------------- + +--- a/Doc/library/internet.rst ++++ b/Doc/library/internet.rst @@ -9,7 +9,7 @@ Internet Protocols and Support single: Internet single: World Wide Web @@ -339,10 +860,8 @@ Index: Python-3.10.13/Doc/library/internet.rst The modules described in this chapter implement internet protocols and support for related technology. They are all implemented in Python. Most of these -Index: Python-3.10.13/Doc/library/locale.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/locale.rst -+++ Python-3.10.13/Doc/library/locale.rst +--- a/Doc/library/locale.rst ++++ b/Doc/library/locale.rst @@ -16,7 +16,7 @@ functionality. The POSIX locale mechanis certain cultural issues in an application, without requiring the programmer to know all the specifics of each country where the software is executed. @@ -361,10 +880,8 @@ Index: Python-3.10.13/Doc/library/locale.rst Locale category for the character type functions. Depending on the settings of this category, the functions of module :mod:`string` dealing with case change -Index: Python-3.10.13/Doc/library/marshal.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/marshal.rst -+++ Python-3.10.13/Doc/library/marshal.rst +--- a/Doc/library/marshal.rst ++++ b/Doc/library/marshal.rst @@ -15,8 +15,8 @@ undocumented on purpose; it may change b rarely does). [#]_ @@ -376,10 +893,8 @@ Index: Python-3.10.13/Doc/library/marshal.rst This is not a general "persistence" module. For general persistence and transfer of Python objects through RPC calls, see the modules :mod:`pickle` and -Index: Python-3.10.13/Doc/library/os.path.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/os.path.rst -+++ Python-3.10.13/Doc/library/os.path.rst +--- a/Doc/library/os.path.rst ++++ b/Doc/library/os.path.rst @@ -159,7 +159,7 @@ the :mod:`glob` module.) On Unix and Windows, return the argument with an initial component of ``~`` or ``~user`` replaced by that *user*'s home directory. @@ -389,10 +904,8 @@ Index: Python-3.10.13/Doc/library/os.path.rst On Unix, an initial ``~`` is replaced by the environment variable :envvar:`HOME` if it is set; otherwise the current user's home directory is looked up in the -Index: Python-3.10.13/Doc/library/os.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/os.rst -+++ Python-3.10.13/Doc/library/os.rst +--- a/Doc/library/os.rst ++++ b/Doc/library/os.rst @@ -1136,7 +1136,7 @@ or `the MSDN None: ++ if app.builder.name != 'gettext': ++ return ++ ++ # allow translating deprecated index entries ++ try: ++ from sphinx.domains.python import pairindextypes ++ except ImportError: ++ pass ++ else: ++ # Sphinx checks if a 'pair' type entry on an index directive is one of ++ # the Sphinx-translated pairindextypes values. As we intend to move ++ # away from this, we need Sphinx to believe that these values don't ++ # exist, by deleting them when using the gettext builder. ++ ++ pairindextypes.pop('module', None) ++ pairindextypes.pop('keyword', None) ++ pairindextypes.pop('operator', None) ++ pairindextypes.pop('object', None) ++ pairindextypes.pop('exception', None) ++ pairindextypes.pop('statement', None) ++ pairindextypes.pop('builtin', None) ++ ++ + def setup(app): + app.add_role('issue', issue_role) + app.add_role('gh', gh_issue_role) +@@ -645,6 +669,7 @@ def setup(app): + app.add_directive_to_domain('py', 'awaitablemethod', PyAwaitableMethod) + app.add_directive_to_domain('py', 'abstractmethod', PyAbstractMethod) + app.add_directive('miscnews', MiscNews) ++ app.connect('builder-inited', patch_pairindextypes) + app.connect('doctree-resolved', process_audit_events) + app.connect('env-merge-info', audit_events_merge) + app.connect('env-purge-doc', audit_events_purge) +--- a/Doc/tutorial/classes.rst ++++ b/Doc/tutorial/classes.rst @@ -344,7 +344,7 @@ list objects have methods called append, However, in the following discussion, we'll use the term method exclusively to mean methods of class instance objects, unless explicitly stated otherwise.) @@ -2781,10 +2983,8 @@ Index: Python-3.10.13/Doc/tutorial/classes.rst Valid method names of an instance object depend on its class. By definition, all attributes of a class that are function objects define corresponding -Index: Python-3.10.13/Doc/tutorial/controlflow.rst -=================================================================== ---- Python-3.10.13.orig/Doc/tutorial/controlflow.rst -+++ Python-3.10.13/Doc/tutorial/controlflow.rst +--- a/Doc/tutorial/controlflow.rst ++++ b/Doc/tutorial/controlflow.rst @@ -46,7 +46,7 @@ details see :ref:`tut-match`. ========================== @@ -2794,288 +2994,50 @@ Index: Python-3.10.13/Doc/tutorial/controlflow.rst The :keyword:`for` statement in Python differs a bit from what you may be used to in C or Pascal. Rather than always iterating over an arithmetic progression -Index: Python-3.10.13/Doc/c-api/import.rst -=================================================================== ---- Python-3.10.13.orig/Doc/c-api/import.rst -+++ Python-3.10.13/Doc/c-api/import.rst -@@ -41,7 +41,7 @@ Importing Modules - - .. c:function:: PyObject* PyImport_ImportModuleEx(const char *name, PyObject *globals, PyObject *locals, PyObject *fromlist) - -- .. index:: builtin: __import__ -+ .. index:: pair: built-in function; __import__ - - Import a module. This is best described by referring to the built-in Python - function :func:`__import__`. -@@ -120,7 +120,7 @@ Importing Modules - - .. c:function:: PyObject* PyImport_ExecCodeModule(const char *name, PyObject *co) - -- .. index:: builtin: compile -+ .. index:: pair: built-in function; compile - - Given a module name (possibly of the form ``package.module``) and a code object - read from a Python bytecode file or obtained from the built-in function -Index: Python-3.10.13/Doc/c-api/mapping.rst -=================================================================== ---- Python-3.10.13.orig/Doc/c-api/mapping.rst -+++ Python-3.10.13/Doc/c-api/mapping.rst -@@ -20,7 +20,7 @@ See also :c:func:`PyObject_GetItem`, :c: - .. c:function:: Py_ssize_t PyMapping_Size(PyObject *o) - Py_ssize_t PyMapping_Length(PyObject *o) - -- .. index:: builtin: len -+ .. index:: pair: built-in function; len - - Returns the number of keys in object *o* on success, and ``-1`` on failure. - This is equivalent to the Python expression ``len(o)``. -Index: Python-3.10.13/Doc/c-api/number.rst -=================================================================== ---- Python-3.10.13.orig/Doc/c-api/number.rst -+++ Python-3.10.13/Doc/c-api/number.rst -@@ -64,7 +64,7 @@ Number Protocol - - .. c:function:: PyObject* PyNumber_Divmod(PyObject *o1, PyObject *o2) - -- .. index:: builtin: divmod -+ .. index:: pair: built-in function; divmod - - See the built-in function :func:`divmod`. Returns ``NULL`` on failure. This is - the equivalent of the Python expression ``divmod(o1, o2)``. -@@ -72,7 +72,7 @@ Number Protocol - - .. c:function:: PyObject* PyNumber_Power(PyObject *o1, PyObject *o2, PyObject *o3) - -- .. index:: builtin: pow -+ .. index:: pair: built-in function; pow - - See the built-in function :func:`pow`. Returns ``NULL`` on failure. This is the - equivalent of the Python expression ``pow(o1, o2, o3)``, where *o3* is optional. -@@ -94,7 +94,7 @@ Number Protocol - - .. c:function:: PyObject* PyNumber_Absolute(PyObject *o) - -- .. index:: builtin: abs -+ .. index:: pair: built-in function; abs - - Returns the absolute value of *o*, or ``NULL`` on failure. This is the equivalent - of the Python expression ``abs(o)``. -@@ -192,7 +192,7 @@ Number Protocol - - .. c:function:: PyObject* PyNumber_InPlacePower(PyObject *o1, PyObject *o2, PyObject *o3) - -- .. index:: builtin: pow -+ .. index:: pair: built-in function; pow - - See the built-in function :func:`pow`. Returns ``NULL`` on failure. The operation - is done *in-place* when *o1* supports it. This is the equivalent of the Python -@@ -238,7 +238,7 @@ Number Protocol - - .. c:function:: PyObject* PyNumber_Long(PyObject *o) - -- .. index:: builtin: int -+ .. index:: pair: built-in function; int - - Returns the *o* converted to an integer object on success, or ``NULL`` on - failure. This is the equivalent of the Python expression ``int(o)``. -@@ -246,7 +246,7 @@ Number Protocol - - .. c:function:: PyObject* PyNumber_Float(PyObject *o) - -- .. index:: builtin: float -+ .. index:: pair: built-in function; float - - Returns the *o* converted to a float object on success, or ``NULL`` on failure. - This is the equivalent of the Python expression ``float(o)``. -Index: Python-3.10.13/Doc/c-api/object.rst -=================================================================== ---- Python-3.10.13.orig/Doc/c-api/object.rst -+++ Python-3.10.13/Doc/c-api/object.rst -@@ -172,7 +172,7 @@ Object Protocol - - .. c:function:: PyObject* PyObject_Repr(PyObject *o) - -- .. index:: builtin: repr -+ .. index:: pair: built-in function; repr - - Compute a string representation of object *o*. Returns the string - representation on success, ``NULL`` on failure. This is the equivalent of the -@@ -184,7 +184,7 @@ Object Protocol - - .. c:function:: PyObject* PyObject_ASCII(PyObject *o) - -- .. index:: builtin: ascii -+ .. index:: pair: built-in function; ascii - - As :c:func:`PyObject_Repr`, compute a string representation of object *o*, but - escape the non-ASCII characters in the string returned by -@@ -209,7 +209,7 @@ Object Protocol - - .. c:function:: PyObject* PyObject_Bytes(PyObject *o) - -- .. index:: builtin: bytes -+ .. index:: pair: built-in function; bytes - - Compute a bytes representation of object *o*. ``NULL`` is returned on - failure and a bytes object on success. This is equivalent to the Python -@@ -260,7 +260,7 @@ Object Protocol - - .. c:function:: Py_hash_t PyObject_Hash(PyObject *o) - -- .. index:: builtin: hash -+ .. index:: pair: built-in function; hash - - Compute and return the hash value of an object *o*. On failure, return ``-1``. - This is the equivalent of the Python expression ``hash(o)``. -@@ -294,7 +294,7 @@ Object Protocol - - .. c:function:: PyObject* PyObject_Type(PyObject *o) - -- .. index:: builtin: type -+ .. index:: pair: built-in function; type - - When *o* is non-``NULL``, returns a type object corresponding to the object type - of object *o*. On failure, raises :exc:`SystemError` and returns ``NULL``. This -@@ -315,7 +315,7 @@ Object Protocol - .. c:function:: Py_ssize_t PyObject_Size(PyObject *o) - Py_ssize_t PyObject_Length(PyObject *o) - -- .. index:: builtin: len -+ .. index:: pair: built-in function; len - - Return the length of object *o*. If the object *o* provides either the sequence - and mapping protocols, the sequence length is returned. On error, ``-1`` is -Index: Python-3.10.13/Doc/c-api/sequence.rst -=================================================================== ---- Python-3.10.13.orig/Doc/c-api/sequence.rst -+++ Python-3.10.13/Doc/c-api/sequence.rst -@@ -18,7 +18,7 @@ Sequence Protocol - .. c:function:: Py_ssize_t PySequence_Size(PyObject *o) - Py_ssize_t PySequence_Length(PyObject *o) - -- .. index:: builtin: len -+ .. index:: pair: built-in function; len - - Returns the number of objects in sequence *o* on success, and ``-1`` on - failure. This is equivalent to the Python expression ``len(o)``. -@@ -120,7 +120,7 @@ Sequence Protocol - - .. c:function:: PyObject* PySequence_Tuple(PyObject *o) - -- .. index:: builtin: tuple -+ .. index:: pair: built-in function; tuple - - Return a tuple object with the same contents as the sequence or iterable *o*, - or ``NULL`` on failure. If *o* is a tuple, a new reference will be returned, -Index: Python-3.10.13/Doc/c-api/structures.rst -=================================================================== ---- Python-3.10.13.orig/Doc/c-api/structures.rst -+++ Python-3.10.13/Doc/c-api/structures.rst -@@ -351,7 +351,7 @@ method. - - .. data:: METH_CLASS - -- .. index:: builtin: classmethod -+ .. index:: pair: built-in function; classmethod - - The method will be passed the type object as the first parameter rather - than an instance of the type. This is used to create *class methods*, -@@ -361,7 +361,7 @@ method. - - .. data:: METH_STATIC - -- .. index:: builtin: staticmethod -+ .. index:: pair: built-in function; staticmethod - - The method will be passed ``NULL`` as the first parameter rather than an - instance of the type. This is used to create *static methods*, similar to -Index: Python-3.10.13/Doc/c-api/typeobj.rst -=================================================================== ---- Python-3.10.13.orig/Doc/c-api/typeobj.rst -+++ Python-3.10.13/Doc/c-api/typeobj.rst -@@ -803,7 +803,7 @@ and :c:type:`PyType_Type` effectively ac - - .. c:member:: reprfunc PyTypeObject.tp_repr - -- .. index:: builtin: repr -+ .. index:: pair: built-in function; repr - - An optional pointer to a function that implements the built-in function - :func:`repr`. -@@ -868,7 +868,7 @@ and :c:type:`PyType_Type` effectively ac - - .. c:member:: hashfunc PyTypeObject.tp_hash - -- .. index:: builtin: hash -+ .. index:: pair: built-in function; hash - - An optional pointer to a function that implements the built-in function - :func:`hash`. -Index: Python-3.10.13/Doc/extending/newtypes.rst -=================================================================== ---- Python-3.10.13.orig/Doc/extending/newtypes.rst -+++ Python-3.10.13/Doc/extending/newtypes.rst -@@ -149,7 +149,7 @@ done. This can be done using the :c:fun +--- a/Doc/tutorial/inputoutput.rst ++++ b/Doc/tutorial/inputoutput.rst +@@ -285,8 +285,8 @@ Reading and Writing Files + ========================= .. index:: - single: string; object representation -- builtin: repr -+ pair: built-in function; repr +- builtin: open +- object: file ++ pair: built-in function; open ++ pair: object; file - Object Presentation - ------------------- -Index: Python-3.10.13/Doc/library/dis.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/dis.rst -+++ Python-3.10.13/Doc/library/dis.rst -@@ -1207,7 +1207,7 @@ All of the following opcodes use their a + :func:`open` returns a :term:`file object`, and is most commonly used with + two positional arguments and one keyword argument: +@@ -466,7 +466,7 @@ Reference for a complete guide to file o + Saving structured data with :mod:`json` + --------------------------------------- - .. opcode:: BUILD_SLICE (argc) +-.. index:: module: json ++.. index:: pair: module; json -- .. index:: builtin: slice -+ .. index:: pair: built-in function; slice + Strings can easily be written to and read from a file. Numbers take a bit more + effort, since the :meth:`read` method only returns strings, which will have to +--- a/Doc/tutorial/modules.rst ++++ b/Doc/tutorial/modules.rst +@@ -260,7 +260,7 @@ Some tips for experts: + Standard Modules + ================ - Pushes a slice object on the stack. *argc* must be 2 or 3. If it is 2, - ``slice(TOS1, TOS)`` is pushed; if it is 3, ``slice(TOS2, TOS1, TOS)`` is -Index: Python-3.10.13/Doc/library/pprint.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/pprint.rst -+++ Python-3.10.13/Doc/library/pprint.rst -@@ -171,7 +171,7 @@ The :mod:`pprint` module also provides s +-.. index:: module: sys ++.. index:: pair: module; sys - .. function:: isreadable(object) + Python comes with a library of standard modules, described in a separate + document, the Python Library Reference ("Library Reference" hereafter). Some +@@ -341,7 +341,7 @@ Without arguments, :func:`dir` lists the -- .. index:: builtin: eval -+ .. index:: pair: built-in function; eval + Note that it lists all types of names: variables, modules, functions, etc. - Determine if the formatted representation of *object* is "readable", or can be - used to reconstruct the value using :func:`eval`. This always returns ``False`` -@@ -226,7 +226,7 @@ created. +-.. index:: module: builtins ++.. index:: pair: module; builtins - .. method:: PrettyPrinter.isreadable(object) - -- .. index:: builtin: eval -+ .. index:: pair: built-in function; eval - - Determine if the formatted representation of the object is "readable," or can be - used to reconstruct the value using :func:`eval`. Note that this returns -Index: Python-3.10.13/Doc/library/types.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/types.rst -+++ Python-3.10.13/Doc/library/types.rst -@@ -146,7 +146,7 @@ Standard names are defined for the follo - - .. class:: CodeType(**kwargs) - -- .. index:: builtin: compile -+ .. index:: pair: built-in function; compile - - The type for code objects such as returned by :func:`compile`. - -Index: Python-3.10.13/Doc/tutorial/stdlib.rst -=================================================================== ---- Python-3.10.13.orig/Doc/tutorial/stdlib.rst -+++ Python-3.10.13/Doc/tutorial/stdlib.rst + :func:`dir` does not list the names of built-in functions and variables. If you + want a list of those, they are defined in the standard module +--- a/Doc/tutorial/stdlib.rst ++++ b/Doc/tutorial/stdlib.rst @@ -24,7 +24,7 @@ Be sure to use the ``import os`` style i will keep :func:`os.open` from shadowing the built-in :func:`open` function which operates much differently. @@ -3085,33 +3047,3 @@ Index: Python-3.10.13/Doc/tutorial/stdlib.rst The built-in :func:`dir` and :func:`help` functions are useful as interactive aids for working with large modules like :mod:`os`:: -Index: Python-3.10.13/Doc/library/imp.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/imp.rst -+++ Python-3.10.13/Doc/library/imp.rst -@@ -10,7 +10,7 @@ - .. deprecated:: 3.4 - The :mod:`imp` module is deprecated in favor of :mod:`importlib`. - --.. index:: statement: import -+.. index:: pair: statement; import - - -------------- - -Index: Python-3.10.13/Doc/library/binascii.rst -=================================================================== ---- Python-3.10.13.orig/Doc/library/binascii.rst -+++ Python-3.10.13/Doc/library/binascii.rst -@@ -6,9 +6,9 @@ - representations. - - .. index:: -- module: uu -- module: base64 -- module: binhex -+ pair: module; uu -+ pair: module; base64 -+ pair: module; binhex - - -------------- - diff --git a/libexpat260.patch b/libexpat260.patch new file mode 100644 index 0000000..bd4b4b3 --- /dev/null +++ b/libexpat260.patch @@ -0,0 +1,107 @@ +From f2eebf3c38eae77765247791576b437ec25ccfe2 Mon Sep 17 00:00:00 2001 +From: Serhiy Storchaka +Date: Sun, 11 Feb 2024 12:08:39 +0200 +Subject: [PATCH] gh-115133: Fix tests for XMLPullParser with Expat 2.6.0 + (GH-115164) + +Feeding the parser by too small chunks defers parsing to prevent +CVE-2023-52425. Future versions of Expat may be more reactive. +(cherry picked from commit 4a08e7b3431cd32a0daf22a33421cd3035343dc4) + +Co-authored-by: Serhiy Storchaka +--- + Lib/test/test_xml_etree.py | 58 ++++++++++++------- + ...-02-08-14-21-28.gh-issue-115133.ycl4ko.rst | 2 + + 2 files changed, 38 insertions(+), 22 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst + +Index: Python-3.10.13/Lib/test/test_xml_etree.py +=================================================================== +--- Python-3.10.13.orig/Lib/test/test_xml_etree.py ++++ Python-3.10.13/Lib/test/test_xml_etree.py +@@ -13,6 +13,7 @@ import itertools + import operator + import os + import pickle ++import pyexpat + import sys + import textwrap + import types +@@ -120,6 +121,10 @@ ATTLIST_XML = """\ + + """ + ++fails_with_expat_2_6_0 = (unittest.expectedFailure ++ if pyexpat.version_info >= (2, 6, 0) else ++ lambda test: test) ++ + def checkwarnings(*filters, quiet=False): + def decorator(test): + def newtest(*args, **kwargs): +@@ -1396,28 +1401,37 @@ class XMLPullParserTest(unittest.TestCas + self.assertEqual([(action, elem.tag) for action, elem in events], + expected) + +- def test_simple_xml(self): +- for chunk_size in (None, 1, 5): +- with self.subTest(chunk_size=chunk_size): +- parser = ET.XMLPullParser() +- self.assert_event_tags(parser, []) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, []) +- self._feed(parser, +- "\n text\n", chunk_size) +- self.assert_event_tags(parser, [('end', 'element')]) +- self._feed(parser, "texttail\n", chunk_size) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, [ +- ('end', 'element'), +- ('end', 'empty-element'), +- ]) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, [('end', 'root')]) +- self.assertIsNone(parser.close()) ++ def test_simple_xml(self, chunk_size=None): ++ parser = ET.XMLPullParser() ++ self.assert_event_tags(parser, []) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, []) ++ self._feed(parser, ++ "\n text\n", chunk_size) ++ self.assert_event_tags(parser, [('end', 'element')]) ++ self._feed(parser, "texttail\n", chunk_size) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, [ ++ ('end', 'element'), ++ ('end', 'empty-element'), ++ ]) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, [('end', 'root')]) ++ self.assertIsNone(parser.close()) ++ ++ @fails_with_expat_2_6_0 ++ def test_simple_xml_chunk_1(self): ++ self.test_simple_xml(chunk_size=1) ++ ++ @fails_with_expat_2_6_0 ++ def test_simple_xml_chunk_5(self): ++ self.test_simple_xml(chunk_size=5) ++ ++ def test_simple_xml_chunk_22(self): ++ self.test_simple_xml(chunk_size=22) + + def test_feed_while_iterating(self): + parser = ET.XMLPullParser() +Index: Python-3.10.13/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst +=================================================================== +--- /dev/null ++++ Python-3.10.13/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst +@@ -0,0 +1,2 @@ ++Fix tests for :class:`~xml.etree.ElementTree.XMLPullParser` with Expat ++2.6.0. diff --git a/python310.changes b/python310.changes index b15c530..1807d2a 100644 --- a/python310.changes +++ b/python310.changes @@ -1,3 +1,17 @@ +------------------------------------------------------------------- +Thu Feb 15 10:29:07 UTC 2024 - Daniel Garcia + +- Add upstream patch libexpat260.patch, Fix tests for XMLPullParser + with Expat 2.6.0, gh#python/cpython#115289 + +------------------------------------------------------------------- +Mon Dec 18 16:20:58 UTC 2023 - Matej Cepl + +- Refresh CVE-2023-27043-email-parsing-errors.patch to + gh#python/cpython!111116, fixing bsc#1210638 (CVE-2023-27043). +- Thus we can remove Revert-gh105127-left-tests.patch, which is + now useless. + ------------------------------------------------------------------- Mon Sep 4 13:18:29 UTC 2023 - Daniel Garcia diff --git a/python310.spec b/python310.spec index 720a3fb..b214c0f 100644 --- a/python310.spec +++ b/python310.spec @@ -1,7 +1,7 @@ # -# spec file +# spec file for package python310 # -# Copyright (c) 2023 SUSE LLC +# Copyright (c) 2024 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -177,9 +177,6 @@ Patch39: gh-78214-marshal_stabilize_FLAG_REF.patch # indicate the parsing error (old API), from gh#python/cpython!105127 # Patch carries a REGRESSION (gh#python/cpython#106669), so it has been also partially REVERTED Patch40: CVE-2023-27043-email-parsing-errors.patch -# PATCH-FIX-UPSTREAM Revert-gh105127-left-tests.patch bsc#1210638 mcepl@suse.com -# Partially revert previous patch -Patch41: Revert-gh105127-left-tests.patch # PATCH-FIX-UPSTREAM fix-sphinx-72.patch gh#python/cpython#97950 # This is a patch with a lot of PR combined to make the doc work with # sphinx 7.2 @@ -194,6 +191,9 @@ Patch41: Revert-gh105127-left-tests.patch # * gh#python/cpython#104221 # * gh#python/cpython#107246 Patch42: fix-sphinx-72.patch +# PATCH-FIX-UPSTREAM libexpat260.patch gh#python/cpython#115289 +# Fix tests for XMLPullParser with Expat 2.6.0 +Patch43: libexpat260.patch BuildRequires: autoconf-archive BuildRequires: automake BuildRequires: fdupes @@ -452,24 +452,24 @@ other applications. %prep %setup -q -n %{tarname} -%patch02 -p1 -%patch06 -p1 -%patch07 -p1 -%patch08 -p1 -%patch09 -p1 -%patch15 -p1 -%patch29 -p1 +%patch -P 02 -p1 +%patch -P 06 -p1 +%patch -P 07 -p1 +%patch -P 08 -p1 +%patch -P 09 -p1 +%patch -P 15 -p1 +%patch -P 29 -p1 %if 0%{?sle_version} && 0%{?sle_version} <= 150300 -%patch33 -p1 -%patch34 -p1 +%patch -P 33 -p1 +%patch -P 34 -p1 %endif -%patch35 -p1 -%patch36 -p1 -%patch38 -p1 -%patch39 -p1 -%patch40 -p1 -%patch41 -p1 -%patch42 -p1 +%patch -P 35 -p1 +%patch -P 36 -p1 +%patch -P 38 -p1 +%patch -P 39 -p1 +%patch -P 40 -p1 +%patch -P 42 -p1 +%patch -P 43 -p1 # drop Autoconf version requirement sed -i 's/^AC_PREREQ/dnl AC_PREREQ/' configure.ac