Files
python39/CVE-2025-4516-DecodeError-handler.patch
Matej Cepl 64818e1d6b - Update to 3.9.23:
- Security
    - gh-135034: Fixes multiple issues that allowed tarfile
      extraction filters (filter="data" and filter="tar") to be
      bypassed using crafted symlinks and hard links.
    - Addresses CVE-2024-12718 (bsc#1244056), CVE-2025-4138
      (bsc#1244059), CVE-2025-4330 (bsc#1244060), and
      CVE-2025-4517 (bsc#1244032).
    - gh-133767: Fix use-after-free in the “unicode-escape”
      decoder with a non-“strict” error handler (CVE-2025-4516,
      bsc#1243273).
    - gh-128840: Short-circuit the processing of long IPv6
      addresses early in ipaddress to prevent excessive memory
      consumption and a minor denial-of-service.
    - gh-80222: Fix bug in the folding of quoted strings
      when flattening an email message using a modern email
      policy. Previously when a quoted string was folded so
      that it spanned more than one line, the surrounding
      quotes and internal escapes would be omitted. This could
      theoretically be used to spoof header lines using a
      carefully constructed quoted string if the resulting
      rendered email was transmitted or re-parsed.
  - Library
    - gh-128840: Fix parsing long IPv6 addresses with embedded
      IPv4 address.
    - gh-134062: ipaddress: fix collisions in __hash__() for
      IPv4Network and IPv6Network objects.
    - gh-123409: Fix ipaddress.IPv6Address.reverse_pointer output
      according to RFC 3596, §2.5. Patch by Bénédikt Tran.
    - bpo-43633: Improve the textual representation of

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python39?expand=0&rev=233
2025-06-09 16:29:52 +00:00

427 lines
19 KiB
Diff

From 0d5d68f7075788b6912f8632dc841dca97ece409 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 20 May 2025 15:46:57 +0300
Subject: [PATCH] [3.9] gh-133767: Fix use-after-free in the unicode-escape
decoder with an error handler (GH-129648) (GH-133944)
If the error handler is used, a new bytes object is created to set as
the object attribute of UnicodeDecodeError, and that bytes object then
replaces the original data. A pointer to the decoded data will became invalid
after destroying that temporary bytes object. So we need other way to return
the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
_PyBytes_DecodeEscape() does not have such issue, because it does not
use the error handlers registry, but it should be changed for compatibility
with _PyUnicode_DecodeUnicodeEscapeInternal().
(cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e)
(cherry picked from commit 6279eb8c076d89d3739a6edb393e43c7929b429d)
(cherry picked from commit a75953b347716fff694aa59a7c7c2489fa50d1f5)
(cherry picked from commit 0c33e5baedf18ebcb04bc41dff7cfc614d5ea5fe)
(cherry picked from commit 8b528cacbbde60504f6ac62784d04889d285f18b)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Include/cpython/bytesobject.h | 4
Include/cpython/unicodeobject.h | 13 ++
Lib/test/test_codeccallbacks.py | 37 ++++++++
Lib/test/test_codecs.py | 39 ++++++--
Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2
Objects/bytesobject.c | 40 ++++++--
Objects/unicodeobject.c | 45 +++++++---
Parser/pegen/parse_string.c | 24 +++--
8 files changed, 164 insertions(+), 40 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
--- a/Include/cpython/bytesobject.h
+++ b/Include/cpython/bytesobject.h
@@ -25,6 +25,10 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex(
int use_bytearray);
/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
+PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
+ const char *,
+ int *, const char **);
+// Export for binary compatibility.
PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, const char **);
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -866,6 +866,19 @@ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeU
);
/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
chars. */
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
+ const char *string, /* Unicode-Escape encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
+ Py_ssize_t *consumed, /* bytes consumed */
+ int *first_invalid_escape_char, /* on return, if not -1, contain the first
+ invalid escaped char (<= 0xff) or invalid
+ octal escape (> 0xff) in string. */
+ const char **first_invalid_escape_ptr); /* on return, if not NULL, may
+ point to the first invalid escaped
+ char in string.
+ May be NULL if errors is not NULL. */
+// Export for binary compatibility.
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
const char *string, /* Unicode-Escape encoded string */
Py_ssize_t length, /* size of string */
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -1,6 +1,7 @@
import codecs
import html.entities
import itertools
+import re
import sys
import unicodedata
import unittest
@@ -1124,7 +1125,7 @@ class CodecCallbackTest(unittest.TestCas
text = 'abc<def>ghi'*n
text.translate(charmap)
- def test_mutatingdecodehandler(self):
+ def test_mutating_decode_handler(self):
baddata = [
("ascii", b"\xff"),
("utf-7", b"++"),
@@ -1159,6 +1160,40 @@ class CodecCallbackTest(unittest.TestCas
for (encoding, data) in baddata:
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
+ def test_mutating_decode_handler_unicode_escape(self):
+ decode = codecs.unicode_escape_decode
+ def mutating(exc):
+ if isinstance(exc, UnicodeDecodeError):
+ r = data.get(exc.object[:exc.end])
+ if r is not None:
+ exc.object = r[0] + exc.object[exc.end:]
+ return ('\u0404', r[1])
+ raise AssertionError("don't know how to handle %r" % exc)
+
+ codecs.register_error('test.mutating2', mutating)
+ data = {
+ br'\x0': (b'\\', 0),
+ br'\x3': (b'xxx\\', 3),
+ br'\x5': (b'x\\', 1),
+ }
+ def check(input, expected, msg):
+ with self.assertWarns(DeprecationWarning) as cm:
+ self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input)))
+ self.assertIn(msg, str(cm.warning))
+
+ check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
+ check(br'\x0z', '\u0404\\z', r"invalid escape sequence '\z'")
+
+ check(br'\x3n\zr', '\u0404\n\\zr', r"invalid escape sequence '\z'")
+ check(br'\x3zr', '\u0404\\zr', r"invalid escape sequence '\z'")
+ check(br'\x3z5', '\u0404\\z5', r"invalid escape sequence '\z'")
+ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r"invalid escape sequence '\z'")
+ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r"invalid escape sequence '\z'")
+
+ check(br'\x5n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
+ check(br'\x5z', '\u0404\\z', r"invalid escape sequence '\z'")
+ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r"invalid escape sequence '\z'")
+
# issue32583
def test_crashing_decode_handler(self):
# better generating one more character to fill the extra space slot
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1178,20 +1178,32 @@ class EscapeDecodeTest(unittest.TestCase
check(br"[\501]", b"[A]")
check(br"[\x41]", b"[A]")
check(br"[\x410]", b"[A0]")
+
+ def test_warnings(self):
+ decode = codecs.escape_decode
+ check = coding_checker(self, decode)
for i in range(97, 123):
b = bytes([i])
if b not in b'abfnrtvx':
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence '\\%c'" % i):
check(b"\\" + b, b"\\" + b)
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence '\\%c'" % (i-32)):
check(b"\\" + b.upper(), b"\\" + b.upper())
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence '\\8'"):
check(br"\8", b"\\8")
with self.assertWarns(DeprecationWarning):
check(br"\9", b"\\9")
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence '\\\xfa'") as cm:
check(b"\\\xfa", b"\\\xfa")
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence '\\z'"):
+ self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4))
+
def test_errors(self):
decode = codecs.escape_decode
self.assertRaises(ValueError, decode, br"\x")
@@ -2393,20 +2405,31 @@ class UnicodeEscapeTest(ReadTest, unitte
check(br"[\x410]", "[A0]")
check(br"\u20ac", "\u20ac")
check(br"\U0001d120", "\U0001d120")
+
+ def test_decode_warnings(self):
+ decode = codecs.unicode_escape_decode
+ check = coding_checker(self, decode)
for i in range(97, 123):
b = bytes([i])
if b not in b'abfnrtuvx':
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence '\\%c'" % i):
check(b"\\" + b, "\\" + chr(i))
if b.upper() not in b'UN':
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence '\\%c'" % (i-32)):
check(b"\\" + b.upper(), "\\" + chr(i-32))
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence '\\8'"):
check(br"\8", "\\8")
with self.assertWarns(DeprecationWarning):
check(br"\9", "\\9")
- with self.assertWarns(DeprecationWarning):
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence '\\\xfa'") as cm:
check(b"\\\xfa", "\\\xfa")
+ with self.assertWarnsRegex(DeprecationWarning,
+ r"invalid escape sequence '\\z'"):
+ self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4))
def test_decode_errors(self):
decode = codecs.unicode_escape_decode
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
@@ -0,0 +1,2 @@
+Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
+handler.
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1060,10 +1060,11 @@ _PyBytes_FormatEx(const char *format, Py
}
/* Unescape a backslash-escaped string. */
-PyObject *_PyBytes_DecodeEscape(const char *s,
+PyObject *_PyBytes_DecodeEscape2(const char *s,
Py_ssize_t len,
const char *errors,
- const char **first_invalid_escape)
+ int *first_invalid_escape_char,
+ const char **first_invalid_escape_ptr)
{
int c;
char *p;
@@ -1077,7 +1078,8 @@ PyObject *_PyBytes_DecodeEscape(const ch
return NULL;
writer.overallocate = 1;
- *first_invalid_escape = NULL;
+ *first_invalid_escape_char = -1;
+ *first_invalid_escape_ptr = NULL;
end = s + len;
while (s < end) {
@@ -1152,9 +1154,10 @@ PyObject *_PyBytes_DecodeEscape(const ch
break;
default:
- if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-1; /* Back up one char, since we've
- already incremented s. */
+ if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape_char = (unsigned char)s[-1];
+ /* Back up one char, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 1;
}
*p++ = '\\';
s--;
@@ -1168,21 +1171,36 @@ PyObject *_PyBytes_DecodeEscape(const ch
return NULL;
}
+// Export for binary compatibility.
+PyObject *_PyBytes_DecodeEscape(const char *s,
+ Py_ssize_t len,
+ const char *errors,
+ const char **first_invalid_escape)
+{
+ int first_invalid_escape_char;
+ return _PyBytes_DecodeEscape2(
+ s, len, errors,
+ &first_invalid_escape_char,
+ first_invalid_escape);
+}
+
PyObject *PyBytes_DecodeEscape(const char *s,
Py_ssize_t len,
const char *errors,
Py_ssize_t Py_UNUSED(unicode),
const char *Py_UNUSED(recode_encoding))
{
- const char* first_invalid_escape;
- PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
- &first_invalid_escape);
+ int first_invalid_escape_char;
+ const char *first_invalid_escape_ptr;
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
if (result == NULL)
return NULL;
- if (first_invalid_escape != NULL) {
+ if (first_invalid_escape_char != -1) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
- (unsigned char)*first_invalid_escape) < 0) {
+ first_invalid_escape_char) < 0) {
Py_DECREF(result);
return NULL;
}
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6278,20 +6278,23 @@ PyUnicode_AsUTF16String(PyObject *unicod
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
PyObject *
-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed,
- const char **first_invalid_escape)
+ int *first_invalid_escape_char,
+ const char **first_invalid_escape_ptr)
{
const char *starts = s;
+ const char *initial_starts = starts;
_PyUnicodeWriter writer;
const char *end;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
// so we can remember if we've seen an invalid escape char or not
- *first_invalid_escape = NULL;
+ *first_invalid_escape_char = -1;
+ *first_invalid_escape_ptr = NULL;
if (size == 0) {
if (consumed) {
@@ -6474,9 +6477,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(c
goto error;
default:
- if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-1; /* Back up one char, since we've
- already incremented s. */
+ if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape_char = c;
+ if (starts == initial_starts) {
+ /* Back up one char, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 1;
+ }
}
WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c);
@@ -6515,22 +6521,39 @@ _PyUnicode_DecodeUnicodeEscapeInternal(c
return NULL;
}
+// Export for binary compatibility.
+PyObject *
+_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed,
+ const char **first_invalid_escape)
+{
+ int first_invalid_escape_char;
+ return _PyUnicode_DecodeUnicodeEscapeInternal2(
+ s, size, errors, consumed,
+ &first_invalid_escape_char,
+ first_invalid_escape);
+}
+
PyObject *
_PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
Py_ssize_t size,
const char *errors,
Py_ssize_t *consumed)
{
- const char *first_invalid_escape;
- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
+ int first_invalid_escape_char;
+ const char *first_invalid_escape_ptr;
+ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors,
consumed,
- &first_invalid_escape);
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
if (result == NULL)
return NULL;
- if (first_invalid_escape != NULL) {
+ if (first_invalid_escape_char != -1) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence '\\%c'",
- (unsigned char)*first_invalid_escape) < 0) {
+ first_invalid_escape_char) < 0) {
Py_DECREF(result);
return NULL;
}
--- a/Parser/pegen/parse_string.c
+++ b/Parser/pegen/parse_string.c
@@ -119,12 +119,15 @@ decode_unicode_with_escapes(Parser *pars
len = p - buf;
s = buf;
- const char *first_invalid_escape;
- v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
+ int first_invalid_escape_char;
+ const char *first_invalid_escape_ptr;
+ v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL,
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
- if (v != NULL && first_invalid_escape != NULL) {
- if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
- /* We have not decref u before because first_invalid_escape points
+ if (v != NULL && first_invalid_escape_ptr != NULL) {
+ if (warn_invalid_escape_sequence(parser, *first_invalid_escape_ptr, t) < 0) {
+ /* We have not decref u before because first_invalid_escape_ptr points
inside u. */
Py_XDECREF(u);
Py_DECREF(v);
@@ -138,14 +141,17 @@ decode_unicode_with_escapes(Parser *pars
static PyObject *
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
{
- const char *first_invalid_escape;
- PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
+ int first_invalid_escape_char;
+ const char *first_invalid_escape_ptr;
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL,
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
if (result == NULL) {
return NULL;
}
- if (first_invalid_escape != NULL) {
- if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
+ if (first_invalid_escape_ptr != NULL) {
+ if (warn_invalid_escape_sequence(p, *first_invalid_escape_ptr, t) < 0) {
Py_DECREF(result);
return NULL;
}