2025-05-22 12:39:01 +00:00
committed by Git OBS Bridge
parent af89117d93
commit 6441e5a86b

View File

@@ -1,7 +1,8 @@
From 3a939ff2298d147459116f98a09549d0f1954039 Mon Sep 17 00:00:00 2001 From a75953b347716fff694aa59a7c7c2489fa50d1f5 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com> From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 4 Feb 2025 11:44:39 +0200 Date: Tue, 20 May 2025 15:46:57 +0300
Subject: Fix use-after-free in the unicode-escape decoder with error handler Subject: [PATCH] [3.12] gh-133767: Fix use-after-free in the unicode-escape
decoder with an error handler (GH-129648) (GH-133944)
If the error handler is used, a new bytes object is created to set as If the error handler is used, a new bytes object is created to set as
the object attribute of UnicodeDecodeError, and that bytes object then the object attribute of UnicodeDecodeError, and that bytes object then
@@ -12,47 +13,45 @@ the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
_PyBytes_DecodeEscape() does not have such issue, because it does not _PyBytes_DecodeEscape() does not have such issue, because it does not
use the error handlers registry, but it should be changed for compatibility use the error handlers registry, but it should be changed for compatibility
with _PyUnicode_DecodeUnicodeEscapeInternal(). with _PyUnicode_DecodeUnicodeEscapeInternal().
--- (cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e)
Include/internal/pycore_bytesobject.h | 5 (cherry picked from commit 6279eb8c076d89d3739a6edb393e43c7929b429d)
Include/internal/pycore_unicodeobject.h | 16 +++
Lib/test/test_codeccallbacks.py | 39 +++++++
Lib/test/test_codecs.py | 52 ++++++++--
Lib/test/test_codeop.py | 4
Lib/test/test_string_literals.py | 8 -
Lib/test/test_unparse.py | 2
Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2
Objects/bytesobject.c | 43 ++++----
Objects/unicodeobject.c | 46 +++++---
Parser/string_parser.c | 32 +++---
11 files changed, 182 insertions(+), 67 deletions(-)
Index: Python-3.12.10/Include/internal/pycore_bytesobject.h Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Include/cpython/bytesobject.h | 4
Include/cpython/unicodeobject.h | 13 ++
Lib/test/test_codeccallbacks.py | 39 ++++++
Lib/test/test_codecs.py | 52 ++++++--
Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2
Objects/bytesobject.c | 54 +++++---
Objects/unicodeobject.c | 61 +++++++---
Parser/string_parser.c | 26 ++--
8 files changed, 194 insertions(+), 57 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
Index: Python-3.12.10/Include/cpython/bytesobject.h
=================================================================== ===================================================================
--- Python-3.12.10.orig/Include/internal/pycore_bytesobject.h 2025-04-08 11:35:47.000000000 +0000 --- Python-3.12.10.orig/Include/cpython/bytesobject.h 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Include/internal/pycore_bytesobject.h 2025-05-17 12:02:30.561879645 +0000 +++ Python-3.12.10/Include/cpython/bytesobject.h 2025-05-22 12:38:07.205729906 +0000
@@ -8,6 +8,11 @@ @@ -25,6 +25,10 @@
# error "this header requires Py_BUILD_CORE define" int use_bytearray);
#endif
+// Helper for PyBytes_DecodeEscape that detects invalid escape chars. /* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
+// Export for test_peg_generator.
+PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t, +PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
+ const char *, + const char *,
+ int *, const char **); + int *, const char **);
+// Export for binary compatibility.
PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, const char **);
/* Substring Search. Index: Python-3.12.10/Include/cpython/unicodeobject.h
Index: Python-3.12.10/Include/internal/pycore_unicodeobject.h
=================================================================== ===================================================================
--- Python-3.12.10.orig/Include/internal/pycore_unicodeobject.h 2025-04-08 11:35:47.000000000 +0000 --- Python-3.12.10.orig/Include/cpython/unicodeobject.h 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Include/internal/pycore_unicodeobject.h 2025-05-17 12:05:20.771043709 +0000 +++ Python-3.12.10/Include/cpython/unicodeobject.h 2025-05-22 12:38:07.205905378 +0000
@@ -79,6 +79,22 @@ @@ -684,6 +684,19 @@
// Like PyUnicode_AsUTF8(), but check for embedded null characters. );
extern const char* _PyUnicode_AsUTF8NoNUL(PyObject *); /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
chars. */
+// Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
+// chars.
+// Export for test_peg_generator.
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2( +PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
+ const char *string, /* Unicode-Escape encoded string */ + const char *string, /* Unicode-Escape encoded string */
+ Py_ssize_t length, /* size of string */ + Py_ssize_t length, /* size of string */
@@ -65,14 +64,14 @@ Index: Python-3.12.10/Include/internal/pycore_unicodeobject.h
+ point to the first invalid escaped + point to the first invalid escaped
+ char in string. + char in string.
+ May be NULL if errors is not NULL. */ + May be NULL if errors is not NULL. */
+ +// Export for binary compatibility.
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
#ifdef __cplusplus const char *string, /* Unicode-Escape encoded string */
} Py_ssize_t length, /* size of string */
Index: Python-3.12.10/Lib/test/test_codeccallbacks.py Index: Python-3.12.10/Lib/test/test_codeccallbacks.py
=================================================================== ===================================================================
--- Python-3.12.10.orig/Lib/test/test_codeccallbacks.py 2025-05-17 12:00:00.337816215 +0000 --- Python-3.12.10.orig/Lib/test/test_codeccallbacks.py 2025-05-22 12:37:58.935377659 +0000
+++ Python-3.12.10/Lib/test/test_codeccallbacks.py 2025-05-18 22:41:55.726670925 +0000 +++ Python-3.12.10/Lib/test/test_codeccallbacks.py 2025-05-22 12:38:07.206131787 +0000
@@ -1,6 +1,7 @@ @@ -1,6 +1,7 @@
import codecs import codecs
import html.entities import html.entities
@@ -115,28 +114,28 @@ Index: Python-3.12.10/Lib/test/test_codeccallbacks.py
+ self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input))) + self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input)))
+ self.assertIn(msg, str(cm.warning)) + self.assertIn(msg, str(cm.warning))
+ +
+ check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence") + check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
+ check(br'\x0n\501', '\u0404\n\u0141', r'invalid octal escape sequence') + check(br'\x0n\501', '\u0404\n\u0141', r"invalid octal escape sequence '\501'")
+ check(br'\x0z', '\u0404\\z', r'invalid escape sequence') + check(br'\x0z', '\u0404\\z', r"invalid escape sequence '\z'")
+ +
+ check(br'\x3n\zr', '\u0404\n\\zr', r'invalid escape sequence') + check(br'\x3n\zr', '\u0404\n\\zr', r"invalid escape sequence '\z'")
+ check(br'\x3zr', '\u0404\\zr', r'invalid escape sequence') + check(br'\x3zr', '\u0404\\zr', r"invalid escape sequence '\z'")
+ check(br'\x3z5', '\u0404\\z5', r'invalid escape sequence') + check(br'\x3z5', '\u0404\\z5', r"invalid escape sequence '\z'")
+ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r'invalid escape sequence') + check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r"invalid escape sequence '\z'")
+ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r'invalid escape sequence') + check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r"invalid escape sequence '\z'")
+ +
+ check(br'\x5n\z', '\u0404\n\\z', r'invalid escape sequence') + check(br'\x5n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
+ check(br'\x5n\501', '\u0404\n\u0141', r'invalid octal escape sequence') + check(br'\x5n\501', '\u0404\n\u0141', r"invalid octal escape sequence '\501'")
+ check(br'\x5z', '\u0404\\z', r'invalid escape sequence') + check(br'\x5z', '\u0404\\z', r"invalid escape sequence '\z'")
+ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r'invalid escape sequence') + check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r"invalid escape sequence '\z'")
+ +
# issue32583 # issue32583
def test_crashing_decode_handler(self): def test_crashing_decode_handler(self):
# better generating one more character to fill the extra space slot # better generating one more character to fill the extra space slot
Index: Python-3.12.10/Lib/test/test_codecs.py Index: Python-3.12.10/Lib/test/test_codecs.py
=================================================================== ===================================================================
--- Python-3.12.10.orig/Lib/test/test_codecs.py 2025-05-17 12:00:00.357214034 +0000 --- Python-3.12.10.orig/Lib/test/test_codecs.py 2025-05-22 12:37:58.952566393 +0000
+++ Python-3.12.10/Lib/test/test_codecs.py 2025-05-19 06:18:25.842127040 +0000 +++ Python-3.12.10/Lib/test/test_codecs.py 2025-05-22 12:38:07.206633606 +0000
@@ -1196,23 +1196,39 @@ @@ -1196,23 +1196,39 @@
check(br"[\1010]", b"[A0]") check(br"[\1010]", b"[A0]")
check(br"[\x41]", b"[A]") check(br"[\x41]", b"[A]")
@@ -150,33 +149,33 @@ Index: Python-3.12.10/Lib/test/test_codecs.py
if b not in b'abfnrtvx': if b not in b'abfnrtvx':
- with self.assertWarns(DeprecationWarning): - with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r"'\\%c' is an invalid escape sequence" % i): + r"invalid escape sequence '\\%c'" % i):
check(b"\\" + b, b"\\" + b) check(b"\\" + b, b"\\" + b)
- with self.assertWarns(DeprecationWarning): - with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence"): + r"invalid escape sequence '\\%c'" % (i-32)):
check(b"\\" + b.upper(), b"\\" + b.upper()) check(b"\\" + b.upper(), b"\\" + b.upper())
- with self.assertWarns(DeprecationWarning): - with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r"'\\8' is an invalid escape sequence"): + r"invalid escape sequence '\\8'"):
check(br"\8", b"\\8") check(br"\8", b"\\8")
with self.assertWarns(DeprecationWarning): with self.assertWarns(DeprecationWarning):
check(br"\9", b"\\9") check(br"\9", b"\\9")
- with self.assertWarns(DeprecationWarning): - with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence') as cm: + r"invalid escape sequence '\\\xfa'") as cm:
check(b"\\\xfa", b"\\\xfa") check(b"\\\xfa", b"\\\xfa")
for i in range(0o400, 0o1000): for i in range(0o400, 0o1000):
- with self.assertWarns(DeprecationWarning): - with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid octal escape sequence'): + r"invalid octal escape sequence '\\%o'" % i):
check(rb'\%o' % i, bytes([i & 0o377])) check(rb'\%o' % i, bytes([i & 0o377]))
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence'): + r"invalid escape sequence '\\z'"):
+ self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4)) + self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4))
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid octal escape sequence'): + r"invalid octal escape sequence '\\501'"):
+ self.assertEqual(decode(br'\x\501', 'ignore'), (b'A', 6)) + self.assertEqual(decode(br'\x\501', 'ignore'), (b'A', 6))
+ +
def test_errors(self): def test_errors(self):
@@ -195,118 +194,50 @@ Index: Python-3.12.10/Lib/test/test_codecs.py
if b not in b'abfnrtuvx': if b not in b'abfnrtuvx':
- with self.assertWarns(DeprecationWarning): - with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence'): + r"invalid escape sequence '\\%c'" % i):
check(b"\\" + b, "\\" + chr(i)) check(b"\\" + b, "\\" + chr(i))
if b.upper() not in b'UN': if b.upper() not in b'UN':
- with self.assertWarns(DeprecationWarning): - with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ 'invalid escape sequence'): + r"invalid escape sequence '\\%c'" % (i-32)):
check(b"\\" + b.upper(), "\\" + chr(i-32)) check(b"\\" + b.upper(), "\\" + chr(i-32))
- with self.assertWarns(DeprecationWarning): - with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence'): + r"invalid escape sequence '\\8'"):
check(br"\8", "\\8") check(br"\8", "\\8")
with self.assertWarns(DeprecationWarning): with self.assertWarns(DeprecationWarning):
check(br"\9", "\\9") check(br"\9", "\\9")
- with self.assertWarns(DeprecationWarning): - with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence') as cm: + r"invalid escape sequence '\\\xfa'") as cm:
check(b"\\\xfa", "\\\xfa") check(b"\\\xfa", "\\\xfa")
for i in range(0o400, 0o1000): for i in range(0o400, 0o1000):
- with self.assertWarns(DeprecationWarning): - with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid octal escape sequence'): + r"invalid octal escape sequence '\\%o'" % i):
check(rb'\%o' % i, chr(i)) check(rb'\%o' % i, chr(i))
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence'): + r"invalid escape sequence '\\z'"):
+ self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4)) + self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4))
+ with self.assertWarnsRegex(DeprecationWarning, + with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid octal escape sequence'): + r"invalid octal escape sequence '\\501'"):
+ self.assertEqual(decode(br'\x\501', 'ignore'), ('\u0141', 6)) + self.assertEqual(decode(br'\x\501', 'ignore'), ('\u0141', 6))
+ +
def test_decode_errors(self): def test_decode_errors(self):
decode = codecs.unicode_escape_decode decode = codecs.unicode_escape_decode
for c, d in (b'x', 2), (b'u', 4), (b'U', 4): for c, d in (b'x', 2), (b'u', 4), (b'U', 4):
Index: Python-3.12.10/Lib/test/test_codeop.py
===================================================================
--- Python-3.12.10.orig/Lib/test/test_codeop.py 2025-05-17 12:00:00.359727062 +0000
+++ Python-3.12.10/Lib/test/test_codeop.py 2025-05-19 14:39:26.700464317 +0000
@@ -281,8 +281,8 @@
def test_warning(self):
# Test that the warning is only returned once.
with warnings_helper.check_warnings(
- ('"is" with \'str\' literal', SyntaxWarning),
- ("invalid escape sequence", SyntaxWarning),
+ (r'"is" with.*literal', SyntaxWarning),
+ (r'invalid escape sequence', SyntaxWarning),
) as w:
compile_command(r"'\e' is 0")
self.assertEqual(len(w.warnings), 2)
Index: Python-3.12.10/Lib/test/test_string_literals.py
===================================================================
--- Python-3.12.10.orig/Lib/test/test_string_literals.py 2025-05-17 12:00:01.489624803 +0000
+++ Python-3.12.10/Lib/test/test_string_literals.py 2025-05-18 22:43:10.029314378 +0000
@@ -116,7 +116,7 @@
warnings.simplefilter('always', category=SyntaxWarning)
eval("'''\n\\z'''")
self.assertEqual(len(w), 1)
- self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
+ self.assertEqual(str(w[0].message), r"'\z' is an invalid escape sequence. ")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
@@ -153,7 +153,7 @@
eval("'''\n\\407'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message),
- r"invalid octal escape sequence '\407'")
+ r"'\407' is an invalid octal escape sequence. ")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
@@ -228,7 +228,7 @@
warnings.simplefilter('always', category=SyntaxWarning)
eval("b'''\n\\z'''")
self.assertEqual(len(w), 1)
- self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
+ self.assertEqual(str(w[0].message), r"'\z' is an invalid escape sequence. ")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
@@ -252,7 +252,7 @@
eval("b'''\n\\407'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message),
- r"invalid octal escape sequence '\407'")
+ r"'\407' is an invalid octal escape sequence. ")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
Index: Python-3.12.10/Lib/test/test_unparse.py
===================================================================
--- Python-3.12.10.orig/Lib/test/test_unparse.py 2025-05-17 12:00:01.749488032 +0000
+++ Python-3.12.10/Lib/test/test_unparse.py 2025-05-18 22:24:53.125415504 +0000
@@ -653,7 +653,7 @@
def test_backslash_in_format_spec(self):
import re
- msg = re.escape("invalid escape sequence '\\ '")
+ msg = re.escape("invalid escape sequence")
with self.assertWarnsRegex(SyntaxWarning, msg):
self.check_ast_roundtrip("""f"{x:\\ }" """)
self.check_ast_roundtrip("""f"{x:\\n}" """)
Index: Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst Index: Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
=================================================================== ===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst 2025-05-17 12:00:10.381159043 +0000 +++ Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst 2025-05-22 12:38:07.207057599 +0000
@@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
+Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error +Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
+handler. +handler.
Index: Python-3.12.10/Objects/bytesobject.c Index: Python-3.12.10/Objects/bytesobject.c
=================================================================== ===================================================================
--- Python-3.12.10.orig/Objects/bytesobject.c 2025-04-08 11:35:47.000000000 +0000 --- Python-3.12.10.orig/Objects/bytesobject.c 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Objects/bytesobject.c 2025-05-17 21:07:50.280395109 +0000 +++ Python-3.12.10/Objects/bytesobject.c 2025-05-22 12:38:07.207534041 +0000
@@ -1048,10 +1048,11 @@ @@ -1048,10 +1048,11 @@
} }
@@ -359,7 +290,26 @@ Index: Python-3.12.10/Objects/bytesobject.c
} }
*p++ = '\\'; *p++ = '\\';
s--; s--;
@@ -1168,17 +1172,18 @@ @@ -1162,23 +1166,37 @@
return NULL;
}
+// Export for binary compatibility.
+PyObject *_PyBytes_DecodeEscape(const char *s,
+ Py_ssize_t len,
+ const char *errors,
+ const char **first_invalid_escape)
+{
+ int first_invalid_escape_char;
+ return _PyBytes_DecodeEscape2(
+ s, len, errors,
+ &first_invalid_escape_char,
+ first_invalid_escape);
+}
+
PyObject *PyBytes_DecodeEscape(const char *s,
Py_ssize_t len,
const char *errors,
Py_ssize_t Py_UNUSED(unicode), Py_ssize_t Py_UNUSED(unicode),
const char *Py_UNUSED(recode_encoding)) const char *Py_UNUSED(recode_encoding))
{ {
@@ -381,18 +331,16 @@ Index: Python-3.12.10/Objects/bytesobject.c
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "invalid octal escape sequence '\\%.3s'", - "invalid octal escape sequence '\\%.3s'",
- first_invalid_escape) < 0) - first_invalid_escape) < 0)
+ "'\\%o' is an invalid octal escape sequence. ", + "invalid octal escape sequence '\\%o'",
+ first_invalid_escape_char) < 0) + first_invalid_escape_char) < 0)
{ {
Py_DECREF(result); Py_DECREF(result);
return NULL; return NULL;
@@ -1186,8 +1191,8 @@ @@ -1187,7 +1205,7 @@
}
else { else {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "invalid escape sequence '\\%c'", "invalid escape sequence '\\%c'",
- c) < 0) - c) < 0)
+ "'\\%c' is an invalid escape sequence. ",
+ first_invalid_escape_char) < 0) + first_invalid_escape_char) < 0)
{ {
Py_DECREF(result); Py_DECREF(result);
@@ -400,7 +348,7 @@ Index: Python-3.12.10/Objects/bytesobject.c
Index: Python-3.12.10/Objects/unicodeobject.c Index: Python-3.12.10/Objects/unicodeobject.c
=================================================================== ===================================================================
--- Python-3.12.10.orig/Objects/unicodeobject.c 2025-04-08 11:35:47.000000000 +0000 --- Python-3.12.10.orig/Objects/unicodeobject.c 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Objects/unicodeobject.c 2025-05-17 12:12:06.666344175 +0000 +++ Python-3.12.10/Objects/unicodeobject.c 2025-05-22 12:38:07.209060814 +0000
@@ -6046,13 +6046,15 @@ @@ -6046,13 +6046,15 @@
/* --- Unicode Escape Codec ----------------------------------------------- */ /* --- Unicode Escape Codec ----------------------------------------------- */
@@ -461,7 +409,28 @@ Index: Python-3.12.10/Objects/unicodeobject.c
} }
WRITE_ASCII_CHAR('\\'); WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c); WRITE_CHAR(c);
@@ -6299,18 +6308,19 @@ @@ -6293,24 +6302,40 @@
return NULL;
}
+// Export for binary compatibility.
+PyObject *
+_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed,
+ const char **first_invalid_escape)
+{
+ int first_invalid_escape_char;
+ return _PyUnicode_DecodeUnicodeEscapeInternal2(
+ s, size, errors, consumed,
+ &first_invalid_escape_char,
+ first_invalid_escape);
+}
+
PyObject *
_PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
Py_ssize_t size,
const char *errors, const char *errors,
Py_ssize_t *consumed) Py_ssize_t *consumed)
{ {
@@ -489,7 +458,7 @@ Index: Python-3.12.10/Objects/unicodeobject.c
{ {
Py_DECREF(result); Py_DECREF(result);
return NULL; return NULL;
@@ -6319,7 +6329,7 @@ @@ -6319,7 +6344,7 @@
else { else {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'", "invalid escape sequence '\\%c'",
@@ -501,27 +470,8 @@ Index: Python-3.12.10/Objects/unicodeobject.c
Index: Python-3.12.10/Parser/string_parser.c Index: Python-3.12.10/Parser/string_parser.c
=================================================================== ===================================================================
--- Python-3.12.10.orig/Parser/string_parser.c 2025-04-08 11:35:47.000000000 +0000 --- Python-3.12.10.orig/Parser/string_parser.c 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Parser/string_parser.c 2025-05-17 21:41:25.941179624 +0000 +++ Python-3.12.10/Parser/string_parser.c 2025-05-22 12:38:07.209950694 +0000
@@ -1,4 +1,6 @@ @@ -181,15 +181,18 @@
#include <Python.h>
+#include "pycore_bytesobject.h" // _PyBytes_DecodeEscape()
+#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal()
#include "tokenizer.h"
#include "pegen.h"
@@ -25,9 +27,9 @@
int octal = ('4' <= c && c <= '7');
PyObject *msg =
octal
- ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
+ ? PyUnicode_FromFormat("'\\%.3s' is an invalid octal escape sequence. ",
first_invalid_escape)
- : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
+ : PyUnicode_FromFormat("'\\%c' is an invalid escape sequence. ", c);
if (msg == NULL) {
return -1;
}
@@ -181,15 +183,18 @@
len = p - buf; len = p - buf;
s = buf; s = buf;
@@ -546,7 +496,7 @@ Index: Python-3.12.10/Parser/string_parser.c
Py_XDECREF(u); Py_XDECREF(u);
Py_DECREF(v); Py_DECREF(v);
return NULL; return NULL;
@@ -202,14 +207,17 @@ @@ -202,14 +205,17 @@
static PyObject * static PyObject *
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
{ {