forked from pool/python39
- Security
- gh-135034: Fixes multiple issues that allowed tarfile
extraction filters (filter="data" and filter="tar") to be
bypassed using crafted symlinks and hard links.
- Addresses CVE-2024-12718 (bsc#1244056), CVE-2025-4138
(bsc#1244059), CVE-2025-4330 (bsc#1244060), and
CVE-2025-4517 (bsc#1244032).
- gh-133767: Fix use-after-free in the “unicode-escape”
decoder with a non-“strict” error handler (CVE-2025-4516,
bsc#1243273).
- gh-128840: Short-circuit the processing of long IPv6
addresses early in ipaddress to prevent excessive memory
consumption and a minor denial-of-service.
- gh-80222: Fix bug in the folding of quoted strings
when flattening an email message using a modern email
policy. Previously when a quoted string was folded so
that it spanned more than one line, the surrounding
quotes and internal escapes would be omitted. This could
theoretically be used to spoof header lines using a
carefully constructed quoted string if the resulting
rendered email was transmitted or re-parsed.
- Library
- gh-128840: Fix parsing long IPv6 addresses with embedded
IPv4 address.
- gh-134062: ipaddress: fix collisions in __hash__() for
IPv4Network and IPv6Network objects.
- gh-123409: Fix ipaddress.IPv6Address.reverse_pointer output
according to RFC 3596, §2.5. Patch by Bénédikt Tran.
- bpo-43633: Improve the textual representation of
OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python39?expand=0&rev=233
427 lines
19 KiB
Diff
427 lines
19 KiB
Diff
From 0d5d68f7075788b6912f8632dc841dca97ece409 Mon Sep 17 00:00:00 2001
|
|
From: Serhiy Storchaka <storchaka@gmail.com>
|
|
Date: Tue, 20 May 2025 15:46:57 +0300
|
|
Subject: [PATCH] [3.9] gh-133767: Fix use-after-free in the unicode-escape
|
|
decoder with an error handler (GH-129648) (GH-133944)
|
|
|
|
If the error handler is used, a new bytes object is created to set as
|
|
the object attribute of UnicodeDecodeError, and that bytes object then
|
|
replaces the original data. A pointer to the decoded data will became invalid
|
|
after destroying that temporary bytes object. So we need other way to return
|
|
the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
|
|
|
|
_PyBytes_DecodeEscape() does not have such issue, because it does not
|
|
use the error handlers registry, but it should be changed for compatibility
|
|
with _PyUnicode_DecodeUnicodeEscapeInternal().
|
|
(cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e)
|
|
(cherry picked from commit 6279eb8c076d89d3739a6edb393e43c7929b429d)
|
|
(cherry picked from commit a75953b347716fff694aa59a7c7c2489fa50d1f5)
|
|
(cherry picked from commit 0c33e5baedf18ebcb04bc41dff7cfc614d5ea5fe)
|
|
(cherry picked from commit 8b528cacbbde60504f6ac62784d04889d285f18b)
|
|
|
|
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
|
|
---
|
|
Include/cpython/bytesobject.h | 4
|
|
Include/cpython/unicodeobject.h | 13 ++
|
|
Lib/test/test_codeccallbacks.py | 37 ++++++++
|
|
Lib/test/test_codecs.py | 39 ++++++--
|
|
Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2
|
|
Objects/bytesobject.c | 40 ++++++--
|
|
Objects/unicodeobject.c | 45 +++++++---
|
|
Parser/pegen/parse_string.c | 24 +++--
|
|
8 files changed, 164 insertions(+), 40 deletions(-)
|
|
create mode 100644 Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
|
|
|
|
--- a/Include/cpython/bytesobject.h
|
|
+++ b/Include/cpython/bytesobject.h
|
|
@@ -25,6 +25,10 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex(
|
|
int use_bytearray);
|
|
|
|
/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
|
|
+PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
|
|
+ const char *,
|
|
+ int *, const char **);
|
|
+// Export for binary compatibility.
|
|
PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
|
|
const char *, const char **);
|
|
|
|
--- a/Include/cpython/unicodeobject.h
|
|
+++ b/Include/cpython/unicodeobject.h
|
|
@@ -866,6 +866,19 @@ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeU
|
|
);
|
|
/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
|
|
chars. */
|
|
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
|
|
+ const char *string, /* Unicode-Escape encoded string */
|
|
+ Py_ssize_t length, /* size of string */
|
|
+ const char *errors, /* error handling */
|
|
+ Py_ssize_t *consumed, /* bytes consumed */
|
|
+ int *first_invalid_escape_char, /* on return, if not -1, contain the first
|
|
+ invalid escaped char (<= 0xff) or invalid
|
|
+ octal escape (> 0xff) in string. */
|
|
+ const char **first_invalid_escape_ptr); /* on return, if not NULL, may
|
|
+ point to the first invalid escaped
|
|
+ char in string.
|
|
+ May be NULL if errors is not NULL. */
|
|
+// Export for binary compatibility.
|
|
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
|
|
const char *string, /* Unicode-Escape encoded string */
|
|
Py_ssize_t length, /* size of string */
|
|
--- a/Lib/test/test_codeccallbacks.py
|
|
+++ b/Lib/test/test_codeccallbacks.py
|
|
@@ -1,6 +1,7 @@
|
|
import codecs
|
|
import html.entities
|
|
import itertools
|
|
+import re
|
|
import sys
|
|
import unicodedata
|
|
import unittest
|
|
@@ -1124,7 +1125,7 @@ class CodecCallbackTest(unittest.TestCas
|
|
text = 'abc<def>ghi'*n
|
|
text.translate(charmap)
|
|
|
|
- def test_mutatingdecodehandler(self):
|
|
+ def test_mutating_decode_handler(self):
|
|
baddata = [
|
|
("ascii", b"\xff"),
|
|
("utf-7", b"++"),
|
|
@@ -1159,6 +1160,40 @@ class CodecCallbackTest(unittest.TestCas
|
|
for (encoding, data) in baddata:
|
|
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
|
|
|
|
+ def test_mutating_decode_handler_unicode_escape(self):
|
|
+ decode = codecs.unicode_escape_decode
|
|
+ def mutating(exc):
|
|
+ if isinstance(exc, UnicodeDecodeError):
|
|
+ r = data.get(exc.object[:exc.end])
|
|
+ if r is not None:
|
|
+ exc.object = r[0] + exc.object[exc.end:]
|
|
+ return ('\u0404', r[1])
|
|
+ raise AssertionError("don't know how to handle %r" % exc)
|
|
+
|
|
+ codecs.register_error('test.mutating2', mutating)
|
|
+ data = {
|
|
+ br'\x0': (b'\\', 0),
|
|
+ br'\x3': (b'xxx\\', 3),
|
|
+ br'\x5': (b'x\\', 1),
|
|
+ }
|
|
+ def check(input, expected, msg):
|
|
+ with self.assertWarns(DeprecationWarning) as cm:
|
|
+ self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input)))
|
|
+ self.assertIn(msg, str(cm.warning))
|
|
+
|
|
+ check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
|
|
+ check(br'\x0z', '\u0404\\z', r"invalid escape sequence '\z'")
|
|
+
|
|
+ check(br'\x3n\zr', '\u0404\n\\zr', r"invalid escape sequence '\z'")
|
|
+ check(br'\x3zr', '\u0404\\zr', r"invalid escape sequence '\z'")
|
|
+ check(br'\x3z5', '\u0404\\z5', r"invalid escape sequence '\z'")
|
|
+ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r"invalid escape sequence '\z'")
|
|
+ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r"invalid escape sequence '\z'")
|
|
+
|
|
+ check(br'\x5n\z', '\u0404\n\\z', r"invalid escape sequence '\z'")
|
|
+ check(br'\x5z', '\u0404\\z', r"invalid escape sequence '\z'")
|
|
+ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r"invalid escape sequence '\z'")
|
|
+
|
|
# issue32583
|
|
def test_crashing_decode_handler(self):
|
|
# better generating one more character to fill the extra space slot
|
|
--- a/Lib/test/test_codecs.py
|
|
+++ b/Lib/test/test_codecs.py
|
|
@@ -1178,20 +1178,32 @@ class EscapeDecodeTest(unittest.TestCase
|
|
check(br"[\501]", b"[A]")
|
|
check(br"[\x41]", b"[A]")
|
|
check(br"[\x410]", b"[A0]")
|
|
+
|
|
+ def test_warnings(self):
|
|
+ decode = codecs.escape_decode
|
|
+ check = coding_checker(self, decode)
|
|
for i in range(97, 123):
|
|
b = bytes([i])
|
|
if b not in b'abfnrtvx':
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r"invalid escape sequence '\\%c'" % i):
|
|
check(b"\\" + b, b"\\" + b)
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r"invalid escape sequence '\\%c'" % (i-32)):
|
|
check(b"\\" + b.upper(), b"\\" + b.upper())
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r"invalid escape sequence '\\8'"):
|
|
check(br"\8", b"\\8")
|
|
with self.assertWarns(DeprecationWarning):
|
|
check(br"\9", b"\\9")
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r"invalid escape sequence '\\\xfa'") as cm:
|
|
check(b"\\\xfa", b"\\\xfa")
|
|
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r"invalid escape sequence '\\z'"):
|
|
+ self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4))
|
|
+
|
|
def test_errors(self):
|
|
decode = codecs.escape_decode
|
|
self.assertRaises(ValueError, decode, br"\x")
|
|
@@ -2393,20 +2405,31 @@ class UnicodeEscapeTest(ReadTest, unitte
|
|
check(br"[\x410]", "[A0]")
|
|
check(br"\u20ac", "\u20ac")
|
|
check(br"\U0001d120", "\U0001d120")
|
|
+
|
|
+ def test_decode_warnings(self):
|
|
+ decode = codecs.unicode_escape_decode
|
|
+ check = coding_checker(self, decode)
|
|
for i in range(97, 123):
|
|
b = bytes([i])
|
|
if b not in b'abfnrtuvx':
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r"invalid escape sequence '\\%c'" % i):
|
|
check(b"\\" + b, "\\" + chr(i))
|
|
if b.upper() not in b'UN':
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r"invalid escape sequence '\\%c'" % (i-32)):
|
|
check(b"\\" + b.upper(), "\\" + chr(i-32))
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r"invalid escape sequence '\\8'"):
|
|
check(br"\8", "\\8")
|
|
with self.assertWarns(DeprecationWarning):
|
|
check(br"\9", "\\9")
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r"invalid escape sequence '\\\xfa'") as cm:
|
|
check(b"\\\xfa", "\\\xfa")
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r"invalid escape sequence '\\z'"):
|
|
+ self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4))
|
|
|
|
def test_decode_errors(self):
|
|
decode = codecs.unicode_escape_decode
|
|
--- /dev/null
|
|
+++ b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
|
|
@@ -0,0 +1,2 @@
|
|
+Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
|
|
+handler.
|
|
--- a/Objects/bytesobject.c
|
|
+++ b/Objects/bytesobject.c
|
|
@@ -1060,10 +1060,11 @@ _PyBytes_FormatEx(const char *format, Py
|
|
}
|
|
|
|
/* Unescape a backslash-escaped string. */
|
|
-PyObject *_PyBytes_DecodeEscape(const char *s,
|
|
+PyObject *_PyBytes_DecodeEscape2(const char *s,
|
|
Py_ssize_t len,
|
|
const char *errors,
|
|
- const char **first_invalid_escape)
|
|
+ int *first_invalid_escape_char,
|
|
+ const char **first_invalid_escape_ptr)
|
|
{
|
|
int c;
|
|
char *p;
|
|
@@ -1077,7 +1078,8 @@ PyObject *_PyBytes_DecodeEscape(const ch
|
|
return NULL;
|
|
writer.overallocate = 1;
|
|
|
|
- *first_invalid_escape = NULL;
|
|
+ *first_invalid_escape_char = -1;
|
|
+ *first_invalid_escape_ptr = NULL;
|
|
|
|
end = s + len;
|
|
while (s < end) {
|
|
@@ -1152,9 +1154,10 @@ PyObject *_PyBytes_DecodeEscape(const ch
|
|
break;
|
|
|
|
default:
|
|
- if (*first_invalid_escape == NULL) {
|
|
- *first_invalid_escape = s-1; /* Back up one char, since we've
|
|
- already incremented s. */
|
|
+ if (*first_invalid_escape_char == -1) {
|
|
+ *first_invalid_escape_char = (unsigned char)s[-1];
|
|
+ /* Back up one char, since we've already incremented s. */
|
|
+ *first_invalid_escape_ptr = s - 1;
|
|
}
|
|
*p++ = '\\';
|
|
s--;
|
|
@@ -1168,21 +1171,36 @@ PyObject *_PyBytes_DecodeEscape(const ch
|
|
return NULL;
|
|
}
|
|
|
|
+// Export for binary compatibility.
|
|
+PyObject *_PyBytes_DecodeEscape(const char *s,
|
|
+ Py_ssize_t len,
|
|
+ const char *errors,
|
|
+ const char **first_invalid_escape)
|
|
+{
|
|
+ int first_invalid_escape_char;
|
|
+ return _PyBytes_DecodeEscape2(
|
|
+ s, len, errors,
|
|
+ &first_invalid_escape_char,
|
|
+ first_invalid_escape);
|
|
+}
|
|
+
|
|
PyObject *PyBytes_DecodeEscape(const char *s,
|
|
Py_ssize_t len,
|
|
const char *errors,
|
|
Py_ssize_t Py_UNUSED(unicode),
|
|
const char *Py_UNUSED(recode_encoding))
|
|
{
|
|
- const char* first_invalid_escape;
|
|
- PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
|
|
- &first_invalid_escape);
|
|
+ int first_invalid_escape_char;
|
|
+ const char *first_invalid_escape_ptr;
|
|
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
|
|
+ &first_invalid_escape_char,
|
|
+ &first_invalid_escape_ptr);
|
|
if (result == NULL)
|
|
return NULL;
|
|
- if (first_invalid_escape != NULL) {
|
|
+ if (first_invalid_escape_char != -1) {
|
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
|
"invalid escape sequence '\\%c'",
|
|
- (unsigned char)*first_invalid_escape) < 0) {
|
|
+ first_invalid_escape_char) < 0) {
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
}
|
|
--- a/Objects/unicodeobject.c
|
|
+++ b/Objects/unicodeobject.c
|
|
@@ -6278,20 +6278,23 @@ PyUnicode_AsUTF16String(PyObject *unicod
|
|
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
|
|
|
|
PyObject *
|
|
-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
|
|
+_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s,
|
|
Py_ssize_t size,
|
|
const char *errors,
|
|
Py_ssize_t *consumed,
|
|
- const char **first_invalid_escape)
|
|
+ int *first_invalid_escape_char,
|
|
+ const char **first_invalid_escape_ptr)
|
|
{
|
|
const char *starts = s;
|
|
+ const char *initial_starts = starts;
|
|
_PyUnicodeWriter writer;
|
|
const char *end;
|
|
PyObject *errorHandler = NULL;
|
|
PyObject *exc = NULL;
|
|
|
|
// so we can remember if we've seen an invalid escape char or not
|
|
- *first_invalid_escape = NULL;
|
|
+ *first_invalid_escape_char = -1;
|
|
+ *first_invalid_escape_ptr = NULL;
|
|
|
|
if (size == 0) {
|
|
if (consumed) {
|
|
@@ -6474,9 +6477,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(c
|
|
goto error;
|
|
|
|
default:
|
|
- if (*first_invalid_escape == NULL) {
|
|
- *first_invalid_escape = s-1; /* Back up one char, since we've
|
|
- already incremented s. */
|
|
+ if (*first_invalid_escape_char == -1) {
|
|
+ *first_invalid_escape_char = c;
|
|
+ if (starts == initial_starts) {
|
|
+ /* Back up one char, since we've already incremented s. */
|
|
+ *first_invalid_escape_ptr = s - 1;
|
|
+ }
|
|
}
|
|
WRITE_ASCII_CHAR('\\');
|
|
WRITE_CHAR(c);
|
|
@@ -6515,22 +6521,39 @@ _PyUnicode_DecodeUnicodeEscapeInternal(c
|
|
return NULL;
|
|
}
|
|
|
|
+// Export for binary compatibility.
|
|
+PyObject *
|
|
+_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
|
|
+ Py_ssize_t size,
|
|
+ const char *errors,
|
|
+ Py_ssize_t *consumed,
|
|
+ const char **first_invalid_escape)
|
|
+{
|
|
+ int first_invalid_escape_char;
|
|
+ return _PyUnicode_DecodeUnicodeEscapeInternal2(
|
|
+ s, size, errors, consumed,
|
|
+ &first_invalid_escape_char,
|
|
+ first_invalid_escape);
|
|
+}
|
|
+
|
|
PyObject *
|
|
_PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
|
|
Py_ssize_t size,
|
|
const char *errors,
|
|
Py_ssize_t *consumed)
|
|
{
|
|
- const char *first_invalid_escape;
|
|
- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
|
|
+ int first_invalid_escape_char;
|
|
+ const char *first_invalid_escape_ptr;
|
|
+ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors,
|
|
consumed,
|
|
- &first_invalid_escape);
|
|
+ &first_invalid_escape_char,
|
|
+ &first_invalid_escape_ptr);
|
|
if (result == NULL)
|
|
return NULL;
|
|
- if (first_invalid_escape != NULL) {
|
|
+ if (first_invalid_escape_char != -1) {
|
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
|
"invalid escape sequence '\\%c'",
|
|
- (unsigned char)*first_invalid_escape) < 0) {
|
|
+ first_invalid_escape_char) < 0) {
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
}
|
|
--- a/Parser/pegen/parse_string.c
|
|
+++ b/Parser/pegen/parse_string.c
|
|
@@ -119,12 +119,15 @@ decode_unicode_with_escapes(Parser *pars
|
|
len = p - buf;
|
|
s = buf;
|
|
|
|
- const char *first_invalid_escape;
|
|
- v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
|
|
+ int first_invalid_escape_char;
|
|
+ const char *first_invalid_escape_ptr;
|
|
+ v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL,
|
|
+ &first_invalid_escape_char,
|
|
+ &first_invalid_escape_ptr);
|
|
|
|
- if (v != NULL && first_invalid_escape != NULL) {
|
|
- if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
|
|
- /* We have not decref u before because first_invalid_escape points
|
|
+ if (v != NULL && first_invalid_escape_ptr != NULL) {
|
|
+ if (warn_invalid_escape_sequence(parser, *first_invalid_escape_ptr, t) < 0) {
|
|
+ /* We have not decref u before because first_invalid_escape_ptr points
|
|
inside u. */
|
|
Py_XDECREF(u);
|
|
Py_DECREF(v);
|
|
@@ -138,14 +141,17 @@ decode_unicode_with_escapes(Parser *pars
|
|
static PyObject *
|
|
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
|
|
{
|
|
- const char *first_invalid_escape;
|
|
- PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
|
|
+ int first_invalid_escape_char;
|
|
+ const char *first_invalid_escape_ptr;
|
|
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL,
|
|
+ &first_invalid_escape_char,
|
|
+ &first_invalid_escape_ptr);
|
|
if (result == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
- if (first_invalid_escape != NULL) {
|
|
- if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
|
|
+ if (first_invalid_escape_ptr != NULL) {
|
|
+ if (warn_invalid_escape_sequence(p, *first_invalid_escape_ptr, t) < 0) {
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
}
|