2025-05-29 14:09:57 +00:00
committed by Git OBS Bridge
parent 01d7c30105
commit 8dd75ac7e9

View File

@@ -18,22 +18,22 @@ with _PyUnicode_DecodeUnicodeEscapeInternal().
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Include/cpython/bytesobject.h | 4
Include/cpython/unicodeobject.h | 13 ++
Lib/test/test_codeccallbacks.py | 40 ++++++
Lib/test/test_codecs.py | 52 ++++++--
Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2
Objects/bytesobject.c | 54 +++++---
Objects/unicodeobject.c | 61 +++++++---
Parser/string_parser.c | 26 ++--
8 files changed, 195 insertions(+), 57 deletions(-)
Include/cpython/bytesobject.h | 4 ++
Include/cpython/unicodeobject.h | 13 ++++
Lib/test/test_codeccallbacks.py | 39 +++++++++++-
Lib/test/test_codecs.py | 52 +++++++++++++---
...-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2 +
Objects/bytesobject.c | 54 ++++++++++------
Objects/unicodeobject.c | 61 +++++++++++++------
Parser/string_parser.c | 26 +++++---
8 files changed, 194 insertions(+), 57 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
Index: Python-3.12.10/Include/cpython/bytesobject.h
===================================================================
--- Python-3.12.10.orig/Include/cpython/bytesobject.h 2025-04-08 13:35:47.000000000 +0200
+++ Python-3.12.10/Include/cpython/bytesobject.h 2025-05-27 17:27:08.578524794 +0200
@@ -25,6 +25,10 @@
diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h
index e982031c107de2..eef607a5760eda 100644
--- a/Include/cpython/bytesobject.h
+++ b/Include/cpython/bytesobject.h
@@ -25,6 +25,10 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex(
int use_bytearray);
/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
@@ -44,11 +44,11 @@ Index: Python-3.12.10/Include/cpython/bytesobject.h
PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, const char **);
Index: Python-3.12.10/Include/cpython/unicodeobject.h
===================================================================
--- Python-3.12.10.orig/Include/cpython/unicodeobject.h 2025-04-08 13:35:47.000000000 +0200
+++ Python-3.12.10/Include/cpython/unicodeobject.h 2025-05-27 17:27:08.578940632 +0200
@@ -684,6 +684,19 @@
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index f177cd9e2af9de..cf38928686019b 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -684,6 +684,19 @@ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
);
/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
chars. */
@@ -68,10 +68,10 @@ Index: Python-3.12.10/Include/cpython/unicodeobject.h
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
const char *string, /* Unicode-Escape encoded string */
Py_ssize_t length, /* size of string */
Index: Python-3.12.10/Lib/test/test_codeccallbacks.py
===================================================================
--- Python-3.12.10.orig/Lib/test/test_codeccallbacks.py 2025-05-27 17:27:04.334768069 +0200
+++ Python-3.12.10/Lib/test/test_codeccallbacks.py 2025-05-27 21:33:39.393417492 +0200
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index 4991330489d139..d85f609d806932 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -1,6 +1,7 @@
import codecs
import html.entities
@@ -80,7 +80,7 @@ Index: Python-3.12.10/Lib/test/test_codeccallbacks.py
import sys
import unicodedata
import unittest
@@ -1124,7 +1125,7 @@
@@ -1124,7 +1125,7 @@ def test_bug828737(self):
text = 'abc<def>ghi'*n
text.translate(charmap)
@@ -89,7 +89,7 @@ Index: Python-3.12.10/Lib/test/test_codeccallbacks.py
baddata = [
("ascii", b"\xff"),
("utf-7", b"++"),
@@ -1159,6 +1160,43 @@
@@ -1159,6 +1160,42 @@ def mutating(exc):
for (encoding, data) in baddata:
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
@@ -114,7 +114,6 @@ Index: Python-3.12.10/Lib/test/test_codeccallbacks.py
+ self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input)))
+ self.assertIn(msg, str(cm.warning))
+
+
+ check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
+ check(br'\x0n\501', '\u0404\n\u0141', r"invalid octal escape sequence '\501'")
+ check(br'\x0z', '\u0404\\z', r"invalid escape sequence '\z'")
@@ -133,11 +132,11 @@ Index: Python-3.12.10/Lib/test/test_codeccallbacks.py
# issue32583
def test_crashing_decode_handler(self):
# better generating one more character to fill the extra space slot
Index: Python-3.12.10/Lib/test/test_codecs.py
===================================================================
--- Python-3.12.10.orig/Lib/test/test_codecs.py 2025-05-27 17:27:04.357147552 +0200
+++ Python-3.12.10/Lib/test/test_codecs.py 2025-05-27 17:27:08.579902284 +0200
@@ -1196,23 +1196,39 @@
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index f683f069ae1397..2e64a52acbae3a 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1196,23 +1196,39 @@ def test_escape(self):
check(br"[\1010]", b"[A0]")
check(br"[\x41]", b"[A]")
check(br"[\x410]", b"[A0]")
@@ -182,7 +181,7 @@ Index: Python-3.12.10/Lib/test/test_codecs.py
def test_errors(self):
decode = codecs.escape_decode
self.assertRaises(ValueError, decode, br"\x")
@@ -2479,24 +2495,40 @@
@@ -2479,24 +2495,40 @@ def test_escape_decode(self):
check(br"[\x410]", "[A0]")
check(br"\u20ac", "\u20ac")
check(br"\U0001d120", "\U0001d120")
@@ -228,18 +227,19 @@ Index: Python-3.12.10/Lib/test/test_codecs.py
def test_decode_errors(self):
decode = codecs.unicode_escape_decode
for c, d in (b'x', 2), (b'u', 4), (b'U', 4):
Index: Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ Python-3.12.10/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst 2025-05-27 17:27:08.580640093 +0200
diff --git a/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
new file mode 100644
index 00000000000000..39d2f1e1a892cf
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
@@ -0,0 +1,2 @@
+Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
+handler.
Index: Python-3.12.10/Objects/bytesobject.c
===================================================================
--- Python-3.12.10.orig/Objects/bytesobject.c 2025-04-08 13:35:47.000000000 +0200
+++ Python-3.12.10/Objects/bytesobject.c 2025-05-27 21:08:20.056554751 +0200
@@ -1048,10 +1048,11 @@
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index f3a978c86c3606..dae84127a7df4b 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1048,10 +1048,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
}
/* Unescape a backslash-escaped string. */
@@ -253,7 +253,7 @@ Index: Python-3.12.10/Objects/bytesobject.c
{
int c;
char *p;
@@ -1065,7 +1066,8 @@
@@ -1065,7 +1066,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
return NULL;
writer.overallocate = 1;
@@ -263,7 +263,7 @@ Index: Python-3.12.10/Objects/bytesobject.c
end = s + len;
while (s < end) {
@@ -1103,9 +1105,10 @@
@@ -1103,9 +1105,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
c = (c<<3) + *s++ - '0';
}
if (c > 0377) {
@@ -277,7 +277,7 @@ Index: Python-3.12.10/Objects/bytesobject.c
}
}
*p++ = c;
@@ -1146,9 +1149,10 @@
@@ -1146,9 +1149,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
break;
default:
@@ -291,7 +291,7 @@ Index: Python-3.12.10/Objects/bytesobject.c
}
*p++ = '\\';
s--;
@@ -1162,23 +1166,37 @@
@@ -1162,23 +1166,37 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
return NULL;
}
@@ -337,7 +337,7 @@ Index: Python-3.12.10/Objects/bytesobject.c
{
Py_DECREF(result);
return NULL;
@@ -1187,7 +1205,7 @@
@@ -1187,7 +1205,7 @@ PyObject *PyBytes_DecodeEscape(const char *s,
else {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
@@ -346,11 +346,11 @@ Index: Python-3.12.10/Objects/bytesobject.c
{
Py_DECREF(result);
return NULL;
Index: Python-3.12.10/Objects/unicodeobject.c
===================================================================
--- Python-3.12.10.orig/Objects/unicodeobject.c 2025-04-08 13:35:47.000000000 +0200
+++ Python-3.12.10/Objects/unicodeobject.c 2025-05-27 21:08:02.745636052 +0200
@@ -6046,13 +6046,15 @@
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 05562ad9927989..5accbd6d1ddcbb 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6046,13 +6046,15 @@ PyUnicode_AsUTF16String(PyObject *unicode)
/* --- Unicode Escape Codec ----------------------------------------------- */
PyObject *
@@ -368,7 +368,7 @@ Index: Python-3.12.10/Objects/unicodeobject.c
_PyUnicodeWriter writer;
const char *end;
PyObject *errorHandler = NULL;
@@ -6061,7 +6063,8 @@
@@ -6061,7 +6063,8 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
PyInterpreterState *interp = _PyInterpreterState_Get();
// so we can remember if we've seen an invalid escape char or not
@@ -378,7 +378,7 @@ Index: Python-3.12.10/Objects/unicodeobject.c
if (size == 0) {
if (consumed) {
@@ -6149,9 +6152,12 @@
@@ -6149,9 +6152,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
}
}
if (ch > 0377) {
@@ -394,7 +394,7 @@ Index: Python-3.12.10/Objects/unicodeobject.c
}
}
WRITE_CHAR(ch);
@@ -6252,9 +6258,12 @@
@@ -6252,9 +6258,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
goto error;
default:
@@ -410,7 +410,7 @@ Index: Python-3.12.10/Objects/unicodeobject.c
}
WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c);
@@ -6293,24 +6302,40 @@
@@ -6293,24 +6302,40 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
return NULL;
}
@@ -459,7 +459,7 @@ Index: Python-3.12.10/Objects/unicodeobject.c
{
Py_DECREF(result);
return NULL;
@@ -6319,7 +6344,7 @@
@@ -6319,7 +6344,7 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
else {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
@@ -468,11 +468,11 @@ Index: Python-3.12.10/Objects/unicodeobject.c
{
Py_DECREF(result);
return NULL;
Index: Python-3.12.10/Parser/string_parser.c
===================================================================
--- Python-3.12.10.orig/Parser/string_parser.c 2025-04-08 13:35:47.000000000 +0200
+++ Python-3.12.10/Parser/string_parser.c 2025-05-27 17:27:08.584587553 +0200
@@ -181,15 +181,18 @@
diff --git a/Parser/string_parser.c b/Parser/string_parser.c
index 8607885f2e46bd..c4c41b07f6b63d 100644
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
@@ -181,15 +181,18 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
len = p - buf;
s = buf;
@@ -497,7 +497,7 @@ Index: Python-3.12.10/Parser/string_parser.c
Py_XDECREF(u);
Py_DECREF(v);
return NULL;
@@ -202,14 +205,17 @@
@@ -202,14 +205,17 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
static PyObject *
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
{