From 6441e5a86b5f96c692dff6359a6ff1f544e73d7f7ef2cde1fc64ab74d42fb017 Mon Sep 17 00:00:00 2001 From: Matej Cepl Date: Thu, 22 May 2025 12:39:01 +0000 Subject: [PATCH] Use patch from Fedora project OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python312?expand=0&rev=135 --- CVE-2025-4516-DecodeError-handler.patch | 292 ++++++++++-------------- 1 file changed, 121 insertions(+), 171 deletions(-) diff --git a/CVE-2025-4516-DecodeError-handler.patch b/CVE-2025-4516-DecodeError-handler.patch index e03c298..fc761c2 100644 --- a/CVE-2025-4516-DecodeError-handler.patch +++ b/CVE-2025-4516-DecodeError-handler.patch @@ -1,7 +1,8 @@ -From 3a939ff2298d147459116f98a09549d0f1954039 Mon Sep 17 00:00:00 2001 +From a75953b347716fff694aa59a7c7c2489fa50d1f5 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka -Date: Tue, 4 Feb 2025 11:44:39 +0200 -Subject: Fix use-after-free in the unicode-escape decoder with error handler +Date: Tue, 20 May 2025 15:46:57 +0300 +Subject: [PATCH] [3.12] gh-133767: Fix use-after-free in the unicode-escape + decoder with an error handler (GH-129648) (GH-133944) If the error handler is used, a new bytes object is created to set as the object attribute of UnicodeDecodeError, and that bytes object then @@ -12,47 +13,45 @@ the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal(). _PyBytes_DecodeEscape() does not have such issue, because it does not use the error handlers registry, but it should be changed for compatibility with _PyUnicode_DecodeUnicodeEscapeInternal(). ---- - Include/internal/pycore_bytesobject.h | 5 - Include/internal/pycore_unicodeobject.h | 16 +++ - Lib/test/test_codeccallbacks.py | 39 +++++++ - Lib/test/test_codecs.py | 52 ++++++++-- - Lib/test/test_codeop.py | 4 - Lib/test/test_string_literals.py | 8 - - Lib/test/test_unparse.py | 2 - Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2 - Objects/bytesobject.c | 43 ++++---- - Objects/unicodeobject.c | 46 +++++--- - Parser/string_parser.c | 32 +++--- - 11 files changed, 182 insertions(+), 67 deletions(-) +(cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e) +(cherry picked from commit 6279eb8c076d89d3739a6edb393e43c7929b429d) -Index: Python-3.12.10/Include/internal/pycore_bytesobject.h +Co-authored-by: Serhiy Storchaka +--- + Include/cpython/bytesobject.h | 4 + Include/cpython/unicodeobject.h | 13 ++ + Lib/test/test_codeccallbacks.py | 39 ++++++ + Lib/test/test_codecs.py | 52 ++++++-- + Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2 + Objects/bytesobject.c | 54 +++++--- + Objects/unicodeobject.c | 61 +++++++--- + Parser/string_parser.c | 26 ++-- + 8 files changed, 194 insertions(+), 57 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst + +Index: Python-3.12.10/Include/cpython/bytesobject.h =================================================================== ---- Python-3.12.10.orig/Include/internal/pycore_bytesobject.h 2025-04-08 11:35:47.000000000 +0000 -+++ Python-3.12.10/Include/internal/pycore_bytesobject.h 2025-05-17 12:02:30.561879645 +0000 -@@ -8,6 +8,11 @@ - # error "this header requires Py_BUILD_CORE define" - #endif +--- Python-3.12.10.orig/Include/cpython/bytesobject.h 2025-04-08 11:35:47.000000000 +0000 ++++ Python-3.12.10/Include/cpython/bytesobject.h 2025-05-22 12:38:07.205729906 +0000 +@@ -25,6 +25,10 @@ + int use_bytearray); -+// Helper for PyBytes_DecodeEscape that detects invalid escape chars. -+// Export for test_peg_generator. + /* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */ +PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t, + const char *, + int *, const char **); ++// Export for binary compatibility. + PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t, + const char *, const char **); - /* Substring Search. - -Index: Python-3.12.10/Include/internal/pycore_unicodeobject.h +Index: Python-3.12.10/Include/cpython/unicodeobject.h =================================================================== ---- Python-3.12.10.orig/Include/internal/pycore_unicodeobject.h 2025-04-08 11:35:47.000000000 +0000 -+++ Python-3.12.10/Include/internal/pycore_unicodeobject.h 2025-05-17 12:05:20.771043709 +0000 -@@ -79,6 +79,22 @@ - // Like PyUnicode_AsUTF8(), but check for embedded null characters. - extern const char* _PyUnicode_AsUTF8NoNUL(PyObject *); - -+// Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape -+// chars. -+// Export for test_peg_generator. +--- Python-3.12.10.orig/Include/cpython/unicodeobject.h 2025-04-08 11:35:47.000000000 +0000 ++++ Python-3.12.10/Include/cpython/unicodeobject.h 2025-05-22 12:38:07.205905378 +0000 +@@ -684,6 +684,19 @@ + ); + /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape + chars. */ +PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ @@ -65,14 +64,14 @@ Index: Python-3.12.10/Include/internal/pycore_unicodeobject.h + point to the first invalid escaped + char in string. + May be NULL if errors is not NULL. */ -+ - - #ifdef __cplusplus - } ++// Export for binary compatibility. + PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ Index: Python-3.12.10/Lib/test/test_codeccallbacks.py =================================================================== ---- Python-3.12.10.orig/Lib/test/test_codeccallbacks.py 2025-05-17 12:00:00.337816215 +0000 -+++ Python-3.12.10/Lib/test/test_codeccallbacks.py 2025-05-18 22:41:55.726670925 +0000 +--- Python-3.12.10.orig/Lib/test/test_codeccallbacks.py 2025-05-22 12:37:58.935377659 +0000 ++++ Python-3.12.10/Lib/test/test_codeccallbacks.py 2025-05-22 12:38:07.206131787 +0000 @@ -1,6 +1,7 @@ import codecs import html.entities @@ -115,28 +114,28 @@ Index: Python-3.12.10/Lib/test/test_codeccallbacks.py + self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input))) + self.assertIn(msg, str(cm.warning)) + -+ check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence") -+ check(br'\x0n\501', '\u0404\n\u0141', r'invalid octal escape sequence') -+ check(br'\x0z', '\u0404\\z', r'invalid escape sequence') ++ check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence '\z'") ++ check(br'\x0n\501', '\u0404\n\u0141', r"invalid octal escape sequence '\501'") ++ check(br'\x0z', '\u0404\\z', r"invalid escape sequence '\z'") + -+ check(br'\x3n\zr', '\u0404\n\\zr', r'invalid escape sequence') -+ check(br'\x3zr', '\u0404\\zr', r'invalid escape sequence') -+ check(br'\x3z5', '\u0404\\z5', r'invalid escape sequence') -+ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r'invalid escape sequence') -+ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r'invalid escape sequence') ++ check(br'\x3n\zr', '\u0404\n\\zr', r"invalid escape sequence '\z'") ++ check(br'\x3zr', '\u0404\\zr', r"invalid escape sequence '\z'") ++ check(br'\x3z5', '\u0404\\z5', r"invalid escape sequence '\z'") ++ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r"invalid escape sequence '\z'") ++ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r"invalid escape sequence '\z'") + -+ check(br'\x5n\z', '\u0404\n\\z', r'invalid escape sequence') -+ check(br'\x5n\501', '\u0404\n\u0141', r'invalid octal escape sequence') -+ check(br'\x5z', '\u0404\\z', r'invalid escape sequence') -+ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r'invalid escape sequence') ++ check(br'\x5n\z', '\u0404\n\\z', r"invalid escape sequence '\z'") ++ check(br'\x5n\501', '\u0404\n\u0141', r"invalid octal escape sequence '\501'") ++ check(br'\x5z', '\u0404\\z', r"invalid escape sequence '\z'") ++ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r"invalid escape sequence '\z'") + # issue32583 def test_crashing_decode_handler(self): # better generating one more character to fill the extra space slot Index: Python-3.12.10/Lib/test/test_codecs.py =================================================================== ---- Python-3.12.10.orig/Lib/test/test_codecs.py 2025-05-17 12:00:00.357214034 +0000 -+++ Python-3.12.10/Lib/test/test_codecs.py 2025-05-19 06:18:25.842127040 +0000 +--- Python-3.12.10.orig/Lib/test/test_codecs.py 2025-05-22 12:37:58.952566393 +0000 ++++ Python-3.12.10/Lib/test/test_codecs.py 2025-05-22 12:38:07.206633606 +0000 @@ -1196,23 +1196,39 @@ check(br"[\1010]", b"[A0]") check(br"[\x41]", b"[A]") @@ -150,33 +149,33 @@ Index: Python-3.12.10/Lib/test/test_codecs.py if b not in b'abfnrtvx': - with self.assertWarns(DeprecationWarning): + with self.assertWarnsRegex(DeprecationWarning, -+ r"'\\%c' is an invalid escape sequence" % i): ++ r"invalid escape sequence '\\%c'" % i): check(b"\\" + b, b"\\" + b) - with self.assertWarns(DeprecationWarning): + with self.assertWarnsRegex(DeprecationWarning, -+ r"invalid escape sequence"): ++ r"invalid escape sequence '\\%c'" % (i-32)): check(b"\\" + b.upper(), b"\\" + b.upper()) - with self.assertWarns(DeprecationWarning): + with self.assertWarnsRegex(DeprecationWarning, -+ r"'\\8' is an invalid escape sequence"): ++ r"invalid escape sequence '\\8'"): check(br"\8", b"\\8") with self.assertWarns(DeprecationWarning): check(br"\9", b"\\9") - with self.assertWarns(DeprecationWarning): + with self.assertWarnsRegex(DeprecationWarning, -+ r'invalid escape sequence') as cm: ++ r"invalid escape sequence '\\\xfa'") as cm: check(b"\\\xfa", b"\\\xfa") for i in range(0o400, 0o1000): - with self.assertWarns(DeprecationWarning): + with self.assertWarnsRegex(DeprecationWarning, -+ r'invalid octal escape sequence'): ++ r"invalid octal escape sequence '\\%o'" % i): check(rb'\%o' % i, bytes([i & 0o377])) + with self.assertWarnsRegex(DeprecationWarning, -+ r'invalid escape sequence'): ++ r"invalid escape sequence '\\z'"): + self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4)) + with self.assertWarnsRegex(DeprecationWarning, -+ r'invalid octal escape sequence'): ++ r"invalid octal escape sequence '\\501'"): + self.assertEqual(decode(br'\x\501', 'ignore'), (b'A', 6)) + def test_errors(self): @@ -195,118 +194,50 @@ Index: Python-3.12.10/Lib/test/test_codecs.py if b not in b'abfnrtuvx': - with self.assertWarns(DeprecationWarning): + with self.assertWarnsRegex(DeprecationWarning, -+ r'invalid escape sequence'): ++ r"invalid escape sequence '\\%c'" % i): check(b"\\" + b, "\\" + chr(i)) if b.upper() not in b'UN': - with self.assertWarns(DeprecationWarning): + with self.assertWarnsRegex(DeprecationWarning, -+ 'invalid escape sequence'): ++ r"invalid escape sequence '\\%c'" % (i-32)): check(b"\\" + b.upper(), "\\" + chr(i-32)) - with self.assertWarns(DeprecationWarning): + with self.assertWarnsRegex(DeprecationWarning, -+ r'invalid escape sequence'): ++ r"invalid escape sequence '\\8'"): check(br"\8", "\\8") with self.assertWarns(DeprecationWarning): check(br"\9", "\\9") - with self.assertWarns(DeprecationWarning): + with self.assertWarnsRegex(DeprecationWarning, -+ r'invalid escape sequence') as cm: ++ r"invalid escape sequence '\\\xfa'") as cm: check(b"\\\xfa", "\\\xfa") for i in range(0o400, 0o1000): - with self.assertWarns(DeprecationWarning): + with self.assertWarnsRegex(DeprecationWarning, -+ r'invalid octal escape sequence'): ++ r"invalid octal escape sequence '\\%o'" % i): check(rb'\%o' % i, chr(i)) + with self.assertWarnsRegex(DeprecationWarning, -+ r'invalid escape sequence'): ++ r"invalid escape sequence '\\z'"): + self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4)) + with self.assertWarnsRegex(DeprecationWarning, -+ r'invalid octal escape sequence'): ++ r"invalid octal escape sequence '\\501'"): + self.assertEqual(decode(br'\x\501', 'ignore'), ('\u0141', 6)) + def test_decode_errors(self): decode = codecs.unicode_escape_decode for c, d in (b'x', 2), (b'u', 4), (b'U', 4): -Index: Python-3.12.10/Lib/test/test_codeop.py -=================================================================== ---- Python-3.12.10.orig/Lib/test/test_codeop.py 2025-05-17 12:00:00.359727062 +0000 -+++ Python-3.12.10/Lib/test/test_codeop.py 2025-05-19 14:39:26.700464317 +0000 -@@ -281,8 +281,8 @@ - def test_warning(self): - # Test that the warning is only returned once. - with warnings_helper.check_warnings( -- ('"is" with \'str\' literal', SyntaxWarning), -- ("invalid escape sequence", SyntaxWarning), -+ (r'"is" with.*literal', SyntaxWarning), -+ (r'invalid escape sequence', SyntaxWarning), - ) as w: - compile_command(r"'\e' is 0") - self.assertEqual(len(w.warnings), 2) -Index: Python-3.12.10/Lib/test/test_string_literals.py -=================================================================== ---- Python-3.12.10.orig/Lib/test/test_string_literals.py 2025-05-17 12:00:01.489624803 +0000 -+++ Python-3.12.10/Lib/test/test_string_literals.py 2025-05-18 22:43:10.029314378 +0000 -@@ -116,7 +116,7 @@ - warnings.simplefilter('always', category=SyntaxWarning) - eval("'''\n\\z'''") - self.assertEqual(len(w), 1) -- self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") -+ self.assertEqual(str(w[0].message), r"'\z' is an invalid escape sequence. ") - self.assertEqual(w[0].filename, '') - self.assertEqual(w[0].lineno, 2) - -@@ -153,7 +153,7 @@ - eval("'''\n\\407'''") - self.assertEqual(len(w), 1) - self.assertEqual(str(w[0].message), -- r"invalid octal escape sequence '\407'") -+ r"'\407' is an invalid octal escape sequence. ") - self.assertEqual(w[0].filename, '') - self.assertEqual(w[0].lineno, 2) - -@@ -228,7 +228,7 @@ - warnings.simplefilter('always', category=SyntaxWarning) - eval("b'''\n\\z'''") - self.assertEqual(len(w), 1) -- self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") -+ self.assertEqual(str(w[0].message), r"'\z' is an invalid escape sequence. ") - self.assertEqual(w[0].filename, '') - self.assertEqual(w[0].lineno, 2) - -@@ -252,7 +252,7 @@ - eval("b'''\n\\407'''") - self.assertEqual(len(w), 1) - self.assertEqual(str(w[0].message), -- r"invalid octal escape sequence '\407'") -+ r"'\407' is an invalid octal escape sequence. ") - self.assertEqual(w[0].filename, '') - self.assertEqual(w[0].lineno, 2) - -Index: Python-3.12.10/Lib/test/test_unparse.py -=================================================================== ---- Python-3.12.10.orig/Lib/test/test_unparse.py 2025-05-17 12:00:01.749488032 +0000 -+++ Python-3.12.10/Lib/test/test_unparse.py 2025-05-18 22:24:53.125415504 +0000 -@@ -653,7 +653,7 @@ - - def test_backslash_in_format_spec(self): - import re -- msg = re.escape("invalid escape sequence '\\ '") -+ msg = re.escape("invalid escape sequence") - with self.assertWarnsRegex(SyntaxWarning, msg): - self.check_ast_roundtrip("""f"{x:\\ }" """) - self.check_ast_roundtrip("""f"{x:\\n}" """) Index: Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst 2025-05-17 12:00:10.381159043 +0000 ++++ Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst 2025-05-22 12:38:07.207057599 +0000 @@ -0,0 +1,2 @@ +Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error +handler. Index: Python-3.12.10/Objects/bytesobject.c =================================================================== --- Python-3.12.10.orig/Objects/bytesobject.c 2025-04-08 11:35:47.000000000 +0000 -+++ Python-3.12.10/Objects/bytesobject.c 2025-05-17 21:07:50.280395109 +0000 ++++ Python-3.12.10/Objects/bytesobject.c 2025-05-22 12:38:07.207534041 +0000 @@ -1048,10 +1048,11 @@ } @@ -359,7 +290,26 @@ Index: Python-3.12.10/Objects/bytesobject.c } *p++ = '\\'; s--; -@@ -1168,17 +1172,18 @@ +@@ -1162,23 +1166,37 @@ + return NULL; + } + ++// Export for binary compatibility. ++PyObject *_PyBytes_DecodeEscape(const char *s, ++ Py_ssize_t len, ++ const char *errors, ++ const char **first_invalid_escape) ++{ ++ int first_invalid_escape_char; ++ return _PyBytes_DecodeEscape2( ++ s, len, errors, ++ &first_invalid_escape_char, ++ first_invalid_escape); ++} ++ + PyObject *PyBytes_DecodeEscape(const char *s, + Py_ssize_t len, + const char *errors, Py_ssize_t Py_UNUSED(unicode), const char *Py_UNUSED(recode_encoding)) { @@ -381,18 +331,16 @@ Index: Python-3.12.10/Objects/bytesobject.c if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "invalid octal escape sequence '\\%.3s'", - first_invalid_escape) < 0) -+ "'\\%o' is an invalid octal escape sequence. ", ++ "invalid octal escape sequence '\\%o'", + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; -@@ -1186,8 +1191,8 @@ - } +@@ -1187,7 +1205,7 @@ else { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, -- "invalid escape sequence '\\%c'", + "invalid escape sequence '\\%c'", - c) < 0) -+ "'\\%c' is an invalid escape sequence. ", + first_invalid_escape_char) < 0) { Py_DECREF(result); @@ -400,7 +348,7 @@ Index: Python-3.12.10/Objects/bytesobject.c Index: Python-3.12.10/Objects/unicodeobject.c =================================================================== --- Python-3.12.10.orig/Objects/unicodeobject.c 2025-04-08 11:35:47.000000000 +0000 -+++ Python-3.12.10/Objects/unicodeobject.c 2025-05-17 12:12:06.666344175 +0000 ++++ Python-3.12.10/Objects/unicodeobject.c 2025-05-22 12:38:07.209060814 +0000 @@ -6046,13 +6046,15 @@ /* --- Unicode Escape Codec ----------------------------------------------- */ @@ -461,7 +409,28 @@ Index: Python-3.12.10/Objects/unicodeobject.c } WRITE_ASCII_CHAR('\\'); WRITE_CHAR(c); -@@ -6299,18 +6308,19 @@ +@@ -6293,24 +6302,40 @@ + return NULL; + } + ++// Export for binary compatibility. ++PyObject * ++_PyUnicode_DecodeUnicodeEscapeInternal(const char *s, ++ Py_ssize_t size, ++ const char *errors, ++ Py_ssize_t *consumed, ++ const char **first_invalid_escape) ++{ ++ int first_invalid_escape_char; ++ return _PyUnicode_DecodeUnicodeEscapeInternal2( ++ s, size, errors, consumed, ++ &first_invalid_escape_char, ++ first_invalid_escape); ++} ++ + PyObject * + _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, + Py_ssize_t size, const char *errors, Py_ssize_t *consumed) { @@ -489,7 +458,7 @@ Index: Python-3.12.10/Objects/unicodeobject.c { Py_DECREF(result); return NULL; -@@ -6319,7 +6329,7 @@ +@@ -6319,7 +6344,7 @@ else { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape sequence '\\%c'", @@ -501,27 +470,8 @@ Index: Python-3.12.10/Objects/unicodeobject.c Index: Python-3.12.10/Parser/string_parser.c =================================================================== --- Python-3.12.10.orig/Parser/string_parser.c 2025-04-08 11:35:47.000000000 +0000 -+++ Python-3.12.10/Parser/string_parser.c 2025-05-17 21:41:25.941179624 +0000 -@@ -1,4 +1,6 @@ - #include -+#include "pycore_bytesobject.h" // _PyBytes_DecodeEscape() -+#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal() - - #include "tokenizer.h" - #include "pegen.h" -@@ -25,9 +27,9 @@ - int octal = ('4' <= c && c <= '7'); - PyObject *msg = - octal -- ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'", -+ ? PyUnicode_FromFormat("'\\%.3s' is an invalid octal escape sequence. ", - first_invalid_escape) -- : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c); -+ : PyUnicode_FromFormat("'\\%c' is an invalid escape sequence. ", c); - if (msg == NULL) { - return -1; - } -@@ -181,15 +183,18 @@ ++++ Python-3.12.10/Parser/string_parser.c 2025-05-22 12:38:07.209950694 +0000 +@@ -181,15 +181,18 @@ len = p - buf; s = buf; @@ -546,7 +496,7 @@ Index: Python-3.12.10/Parser/string_parser.c Py_XDECREF(u); Py_DECREF(v); return NULL; -@@ -202,14 +207,17 @@ +@@ -202,14 +205,17 @@ static PyObject * decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) {