2025-05-22 12:39:01 +00:00
committed by Git OBS Bridge
parent af89117d93
commit 6441e5a86b

View File

@@ -1,7 +1,8 @@
From 3a939ff2298d147459116f98a09549d0f1954039 Mon Sep 17 00:00:00 2001
From a75953b347716fff694aa59a7c7c2489fa50d1f5 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 4 Feb 2025 11:44:39 +0200
Subject: Fix use-after-free in the unicode-escape decoder with error handler
Date: Tue, 20 May 2025 15:46:57 +0300
Subject: [PATCH] [3.12] gh-133767: Fix use-after-free in the unicode-escape
decoder with an error handler (GH-129648) (GH-133944)
If the error handler is used, a new bytes object is created to set as
the object attribute of UnicodeDecodeError, and that bytes object then
@@ -12,47 +13,45 @@ the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
_PyBytes_DecodeEscape() does not have such issue, because it does not
use the error handlers registry, but it should be changed for compatibility
with _PyUnicode_DecodeUnicodeEscapeInternal().
---
Include/internal/pycore_bytesobject.h | 5
Include/internal/pycore_unicodeobject.h | 16 +++
Lib/test/test_codeccallbacks.py | 39 +++++++
Lib/test/test_codecs.py | 52 ++++++++--
Lib/test/test_codeop.py | 4
Lib/test/test_string_literals.py | 8 -
Lib/test/test_unparse.py | 2
Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2
Objects/bytesobject.c | 43 ++++----
Objects/unicodeobject.c | 46 +++++---
Parser/string_parser.c | 32 +++---
11 files changed, 182 insertions(+), 67 deletions(-)
(cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e)
(cherry picked from commit 6279eb8c076d89d3739a6edb393e43c7929b429d)
Index: Python-3.12.10/Include/internal/pycore_bytesobject.h
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Include/cpython/bytesobject.h | 4
Include/cpython/unicodeobject.h | 13 ++
Lib/test/test_codeccallbacks.py | 39 ++++++
Lib/test/test_codecs.py | 52 ++++++--
Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2
Objects/bytesobject.c | 54 +++++---
Objects/unicodeobject.c | 61 +++++++---
Parser/string_parser.c | 26 ++--
8 files changed, 194 insertions(+), 57 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
Index: Python-3.12.10/Include/cpython/bytesobject.h
===================================================================
--- Python-3.12.10.orig/Include/internal/pycore_bytesobject.h 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Include/internal/pycore_bytesobject.h 2025-05-17 12:02:30.561879645 +0000
@@ -8,6 +8,11 @@
# error "this header requires Py_BUILD_CORE define"
#endif
--- Python-3.12.10.orig/Include/cpython/bytesobject.h 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Include/cpython/bytesobject.h 2025-05-22 12:38:07.205729906 +0000
@@ -25,6 +25,10 @@
int use_bytearray);
+// Helper for PyBytes_DecodeEscape that detects invalid escape chars.
+// Export for test_peg_generator.
/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
+PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
+ const char *,
+ int *, const char **);
+// Export for binary compatibility.
PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, const char **);
/* Substring Search.
Index: Python-3.12.10/Include/internal/pycore_unicodeobject.h
Index: Python-3.12.10/Include/cpython/unicodeobject.h
===================================================================
--- Python-3.12.10.orig/Include/internal/pycore_unicodeobject.h 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Include/internal/pycore_unicodeobject.h 2025-05-17 12:05:20.771043709 +0000
@@ -79,6 +79,22 @@
// Like PyUnicode_AsUTF8(), but check for embedded null characters.
extern const char* _PyUnicode_AsUTF8NoNUL(PyObject *);
+// Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
+// chars.
+// Export for test_peg_generator.
--- Python-3.12.10.orig/Include/cpython/unicodeobject.h 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Include/cpython/unicodeobject.h 2025-05-22 12:38:07.205905378 +0000
@@ -684,6 +684,19 @@
);
/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
chars. */
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
+ const char *string, /* Unicode-Escape encoded string */
+ Py_ssize_t length, /* size of string */
@@ -65,14 +64,14 @@ Index: Python-3.12.10/Include/internal/pycore_unicodeobject.h
+ point to the first invalid escaped
+ char in string.
+ May be NULL if errors is not NULL. */
+
#ifdef __cplusplus
}
+// Export for binary compatibility.
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
const char *string, /* Unicode-Escape encoded string */
Py_ssize_t length, /* size of string */
Index: Python-3.12.10/Lib/test/test_codeccallbacks.py
===================================================================
--- Python-3.12.10.orig/Lib/test/test_codeccallbacks.py 2025-05-17 12:00:00.337816215 +0000
+++ Python-3.12.10/Lib/test/test_codeccallbacks.py 2025-05-18 22:41:55.726670925 +0000
--- Python-3.12.10.orig/Lib/test/test_codeccallbacks.py 2025-05-22 12:37:58.935377659 +0000
+++ Python-3.12.10/Lib/test/test_codeccallbacks.py 2025-05-22 12:38:07.206131787 +0000
@@ -1,6 +1,7 @@
import codecs
import html.entities
@@ -115,28 +114,28 @@ Index: Python-3.12.10/Lib/test/test_codeccallbacks.py
+ self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input)))
+ self.assertIn(msg, str(cm.warning))
+
+ check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence")
+ check(br'\x0n\501', '\u0404\n\u0141', r'invalid octal escape sequence')
+ check(br'\x0z', '\u0404\\z', r'invalid escape sequence')
+ check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
+ check(br'\x0n\501', '\u0404\n\u0141', r"invalid octal escape sequence '\501'")
+ check(br'\x0z', '\u0404\\z', r"invalid escape sequence '\z'")
+
+ check(br'\x3n\zr', '\u0404\n\\zr', r'invalid escape sequence')
+ check(br'\x3zr', '\u0404\\zr', r'invalid escape sequence')
+ check(br'\x3z5', '\u0404\\z5', r'invalid escape sequence')
+ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r'invalid escape sequence')
+ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r'invalid escape sequence')
+ check(br'\x3n\zr', '\u0404\n\\zr', r"invalid escape sequence '\z'")
+ check(br'\x3zr', '\u0404\\zr', r"invalid escape sequence '\z'")
+ check(br'\x3z5', '\u0404\\z5', r"invalid escape sequence '\z'")
+ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r"invalid escape sequence '\z'")
+ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r"invalid escape sequence '\z'")
+
+ check(br'\x5n\z', '\u0404\n\\z', r'invalid escape sequence')
+ check(br'\x5n\501', '\u0404\n\u0141', r'invalid octal escape sequence')
+ check(br'\x5z', '\u0404\\z', r'invalid escape sequence')
+ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r'invalid escape sequence')
+ check(br'\x5n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
+ check(br'\x5n\501', '\u0404\n\u0141', r"invalid octal escape sequence '\501'")
+ check(br'\x5z', '\u0404\\z', r"invalid escape sequence '\z'")
+ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r"invalid escape sequence '\z'")
+
# issue32583
def test_crashing_decode_handler(self):
# better generating one more character to fill the extra space slot
Index: Python-3.12.10/Lib/test/test_codecs.py
===================================================================
--- Python-3.12.10.orig/Lib/test/test_codecs.py 2025-05-17 12:00:00.357214034 +0000
+++ Python-3.12.10/Lib/test/test_codecs.py 2025-05-19 06:18:25.842127040 +0000
--- Python-3.12.10.orig/Lib/test/test_codecs.py 2025-05-22 12:37:58.952566393 +0000
+++ Python-3.12.10/Lib/test/test_codecs.py 2025-05-22 12:38:07.206633606 +0000
@@ -1196,23 +1196,39 @@
check(br"[\1010]", b"[A0]")
check(br"[\x41]", b"[A]")
@@ -150,33 +149,33 @@ Index: Python-3.12.10/Lib/test/test_codecs.py
if b not in b'abfnrtvx':
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"'\\%c' is an invalid escape sequence" % i):
+ r"invalid escape sequence '\\%c'" % i):
check(b"\\" + b, b"\\" + b)
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence"):
+ r"invalid escape sequence '\\%c'" % (i-32)):
check(b"\\" + b.upper(), b"\\" + b.upper())
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"'\\8' is an invalid escape sequence"):
+ r"invalid escape sequence '\\8'"):
check(br"\8", b"\\8")
with self.assertWarns(DeprecationWarning):
check(br"\9", b"\\9")
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence') as cm:
+ r"invalid escape sequence '\\\xfa'") as cm:
check(b"\\\xfa", b"\\\xfa")
for i in range(0o400, 0o1000):
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid octal escape sequence'):
+ r"invalid octal escape sequence '\\%o'" % i):
check(rb'\%o' % i, bytes([i & 0o377]))
+ with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence'):
+ r"invalid escape sequence '\\z'"):
+ self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4))
+ with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid octal escape sequence'):
+ r"invalid octal escape sequence '\\501'"):
+ self.assertEqual(decode(br'\x\501', 'ignore'), (b'A', 6))
+
def test_errors(self):
@@ -195,118 +194,50 @@ Index: Python-3.12.10/Lib/test/test_codecs.py
if b not in b'abfnrtuvx':
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence'):
+ r"invalid escape sequence '\\%c'" % i):
check(b"\\" + b, "\\" + chr(i))
if b.upper() not in b'UN':
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ 'invalid escape sequence'):
+ r"invalid escape sequence '\\%c'" % (i-32)):
check(b"\\" + b.upper(), "\\" + chr(i-32))
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence'):
+ r"invalid escape sequence '\\8'"):
check(br"\8", "\\8")
with self.assertWarns(DeprecationWarning):
check(br"\9", "\\9")
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence') as cm:
+ r"invalid escape sequence '\\\xfa'") as cm:
check(b"\\\xfa", "\\\xfa")
for i in range(0o400, 0o1000):
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid octal escape sequence'):
+ r"invalid octal escape sequence '\\%o'" % i):
check(rb'\%o' % i, chr(i))
+ with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid escape sequence'):
+ r"invalid escape sequence '\\z'"):
+ self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4))
+ with self.assertWarnsRegex(DeprecationWarning,
+ r'invalid octal escape sequence'):
+ r"invalid octal escape sequence '\\501'"):
+ self.assertEqual(decode(br'\x\501', 'ignore'), ('\u0141', 6))
+
def test_decode_errors(self):
decode = codecs.unicode_escape_decode
for c, d in (b'x', 2), (b'u', 4), (b'U', 4):
Index: Python-3.12.10/Lib/test/test_codeop.py
===================================================================
--- Python-3.12.10.orig/Lib/test/test_codeop.py 2025-05-17 12:00:00.359727062 +0000
+++ Python-3.12.10/Lib/test/test_codeop.py 2025-05-19 14:39:26.700464317 +0000
@@ -281,8 +281,8 @@
def test_warning(self):
# Test that the warning is only returned once.
with warnings_helper.check_warnings(
- ('"is" with \'str\' literal', SyntaxWarning),
- ("invalid escape sequence", SyntaxWarning),
+ (r'"is" with.*literal', SyntaxWarning),
+ (r'invalid escape sequence', SyntaxWarning),
) as w:
compile_command(r"'\e' is 0")
self.assertEqual(len(w.warnings), 2)
Index: Python-3.12.10/Lib/test/test_string_literals.py
===================================================================
--- Python-3.12.10.orig/Lib/test/test_string_literals.py 2025-05-17 12:00:01.489624803 +0000
+++ Python-3.12.10/Lib/test/test_string_literals.py 2025-05-18 22:43:10.029314378 +0000
@@ -116,7 +116,7 @@
warnings.simplefilter('always', category=SyntaxWarning)
eval("'''\n\\z'''")
self.assertEqual(len(w), 1)
- self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
+ self.assertEqual(str(w[0].message), r"'\z' is an invalid escape sequence. ")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
@@ -153,7 +153,7 @@
eval("'''\n\\407'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message),
- r"invalid octal escape sequence '\407'")
+ r"'\407' is an invalid octal escape sequence. ")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
@@ -228,7 +228,7 @@
warnings.simplefilter('always', category=SyntaxWarning)
eval("b'''\n\\z'''")
self.assertEqual(len(w), 1)
- self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
+ self.assertEqual(str(w[0].message), r"'\z' is an invalid escape sequence. ")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
@@ -252,7 +252,7 @@
eval("b'''\n\\407'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message),
- r"invalid octal escape sequence '\407'")
+ r"'\407' is an invalid octal escape sequence. ")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
Index: Python-3.12.10/Lib/test/test_unparse.py
===================================================================
--- Python-3.12.10.orig/Lib/test/test_unparse.py 2025-05-17 12:00:01.749488032 +0000
+++ Python-3.12.10/Lib/test/test_unparse.py 2025-05-18 22:24:53.125415504 +0000
@@ -653,7 +653,7 @@
def test_backslash_in_format_spec(self):
import re
- msg = re.escape("invalid escape sequence '\\ '")
+ msg = re.escape("invalid escape sequence")
with self.assertWarnsRegex(SyntaxWarning, msg):
self.check_ast_roundtrip("""f"{x:\\ }" """)
self.check_ast_roundtrip("""f"{x:\\n}" """)
Index: Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst 2025-05-17 12:00:10.381159043 +0000
+++ Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst 2025-05-22 12:38:07.207057599 +0000
@@ -0,0 +1,2 @@
+Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
+handler.
Index: Python-3.12.10/Objects/bytesobject.c
===================================================================
--- Python-3.12.10.orig/Objects/bytesobject.c 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Objects/bytesobject.c 2025-05-17 21:07:50.280395109 +0000
+++ Python-3.12.10/Objects/bytesobject.c 2025-05-22 12:38:07.207534041 +0000
@@ -1048,10 +1048,11 @@
}
@@ -359,7 +290,26 @@ Index: Python-3.12.10/Objects/bytesobject.c
}
*p++ = '\\';
s--;
@@ -1168,17 +1172,18 @@
@@ -1162,23 +1166,37 @@
return NULL;
}
+// Export for binary compatibility.
+PyObject *_PyBytes_DecodeEscape(const char *s,
+ Py_ssize_t len,
+ const char *errors,
+ const char **first_invalid_escape)
+{
+ int first_invalid_escape_char;
+ return _PyBytes_DecodeEscape2(
+ s, len, errors,
+ &first_invalid_escape_char,
+ first_invalid_escape);
+}
+
PyObject *PyBytes_DecodeEscape(const char *s,
Py_ssize_t len,
const char *errors,
Py_ssize_t Py_UNUSED(unicode),
const char *Py_UNUSED(recode_encoding))
{
@@ -381,18 +331,16 @@ Index: Python-3.12.10/Objects/bytesobject.c
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "invalid octal escape sequence '\\%.3s'",
- first_invalid_escape) < 0)
+ "'\\%o' is an invalid octal escape sequence. ",
+ "invalid octal escape sequence '\\%o'",
+ first_invalid_escape_char) < 0)
{
Py_DECREF(result);
return NULL;
@@ -1186,8 +1191,8 @@
}
@@ -1187,7 +1205,7 @@
else {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "invalid escape sequence '\\%c'",
"invalid escape sequence '\\%c'",
- c) < 0)
+ "'\\%c' is an invalid escape sequence. ",
+ first_invalid_escape_char) < 0)
{
Py_DECREF(result);
@@ -400,7 +348,7 @@ Index: Python-3.12.10/Objects/bytesobject.c
Index: Python-3.12.10/Objects/unicodeobject.c
===================================================================
--- Python-3.12.10.orig/Objects/unicodeobject.c 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Objects/unicodeobject.c 2025-05-17 12:12:06.666344175 +0000
+++ Python-3.12.10/Objects/unicodeobject.c 2025-05-22 12:38:07.209060814 +0000
@@ -6046,13 +6046,15 @@
/* --- Unicode Escape Codec ----------------------------------------------- */
@@ -461,7 +409,28 @@ Index: Python-3.12.10/Objects/unicodeobject.c
}
WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c);
@@ -6299,18 +6308,19 @@
@@ -6293,24 +6302,40 @@
return NULL;
}
+// Export for binary compatibility.
+PyObject *
+_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed,
+ const char **first_invalid_escape)
+{
+ int first_invalid_escape_char;
+ return _PyUnicode_DecodeUnicodeEscapeInternal2(
+ s, size, errors, consumed,
+ &first_invalid_escape_char,
+ first_invalid_escape);
+}
+
PyObject *
_PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
{
@@ -489,7 +458,7 @@ Index: Python-3.12.10/Objects/unicodeobject.c
{
Py_DECREF(result);
return NULL;
@@ -6319,7 +6329,7 @@
@@ -6319,7 +6344,7 @@
else {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
@@ -501,27 +470,8 @@ Index: Python-3.12.10/Objects/unicodeobject.c
Index: Python-3.12.10/Parser/string_parser.c
===================================================================
--- Python-3.12.10.orig/Parser/string_parser.c 2025-04-08 11:35:47.000000000 +0000
+++ Python-3.12.10/Parser/string_parser.c 2025-05-17 21:41:25.941179624 +0000
@@ -1,4 +1,6 @@
#include <Python.h>
+#include "pycore_bytesobject.h" // _PyBytes_DecodeEscape()
+#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal()
#include "tokenizer.h"
#include "pegen.h"
@@ -25,9 +27,9 @@
int octal = ('4' <= c && c <= '7');
PyObject *msg =
octal
- ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
+ ? PyUnicode_FromFormat("'\\%.3s' is an invalid octal escape sequence. ",
first_invalid_escape)
- : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
+ : PyUnicode_FromFormat("'\\%c' is an invalid escape sequence. ", c);
if (msg == NULL) {
return -1;
}
@@ -181,15 +183,18 @@
+++ Python-3.12.10/Parser/string_parser.c 2025-05-22 12:38:07.209950694 +0000
@@ -181,15 +181,18 @@
len = p - buf;
s = buf;
@@ -546,7 +496,7 @@ Index: Python-3.12.10/Parser/string_parser.c
Py_XDECREF(u);
Py_DECREF(v);
return NULL;
@@ -202,14 +207,17 @@
@@ -202,14 +205,17 @@
static PyObject *
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
{