forked from pool/python314
- Library
- gh-137426: Remove the code deprecation of
importlib.abc.ResourceLoader. It is documented as
deprecated, but left for backwards compatibility with other
classes in importlib.abc.
- gh-137282: Fix tab completion and dir() on
concurrent.futures.
- gh-137257: Bump the version of pip bundled in ensurepip to
version 25.2
- gh-137226: Fix behavior of
annotationlib.ForwardRef.evaluate() when the type_params
parameter is passed and the name of a type param is also
present in an enclosing scope.
- gh-130522: Fix unraisable TypeError raised during
interpreter shutdown in the threading module.
- gh-137059: Fix handling of file URLs with a
Windows drive letter in the URL authority by
urllib.request.url2pathname(). This fixes a regression in
earlier pre-releases of Python 3.14.
- gh-130577: tarfile now validates archives to ensure member
offsets are non-negative. (Contributed by Alexander Enrique
Urieles Nieto in gh-130577; CVE-2025-8194, bsc#1247249).
- gh-135228: When dataclasses replaces a class with a slotted
dataclass, the original class can now be garbage collected
again. Earlier changes in Python 3.14 caused this class to
always remain in existence together with the replacement
class synthesized by dataclasses.
- Documentation
- gh-136155: We are now checking for fatal errors in EPUB
OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python314?expand=0&rev=91
481 lines
23 KiB
Diff
481 lines
23 KiB
Diff
From 3a939ff2298d147459116f98a09549d0f1954039 Mon Sep 17 00:00:00 2001
|
|
From: Serhiy Storchaka <storchaka@gmail.com>
|
|
Date: Tue, 4 Feb 2025 11:44:39 +0200
|
|
Subject: Fix use-after-free in the unicode-escape decoder with error handler
|
|
|
|
If the error handler is used, a new bytes object is created to set as
|
|
the object attribute of UnicodeDecodeError, and that bytes object then
|
|
replaces the original data. A pointer to the decoded data will became invalid
|
|
after destroying that temporary bytes object. So we need other way to return
|
|
the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
|
|
|
|
_PyBytes_DecodeEscape() does not have such issue, because it does not
|
|
use the error handlers registry, but it should be changed for compatibility
|
|
with _PyUnicode_DecodeUnicodeEscapeInternal().
|
|
---
|
|
Include/internal/pycore_bytesobject.h | 5
|
|
Include/internal/pycore_unicodeobject.h | 12 +-
|
|
Lib/test/test_codeccallbacks.py | 39 +++++++
|
|
Lib/test/test_codecs.py | 52 ++++++++--
|
|
Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2
|
|
Objects/bytesobject.c | 41 ++++---
|
|
Objects/unicodeobject.c | 46 +++++---
|
|
Parser/string_parser.c | 26 +++--
|
|
8 files changed, 160 insertions(+), 63 deletions(-)
|
|
|
|
Index: Python-3.14.0b1/Include/internal/pycore_bytesobject.h
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Include/internal/pycore_bytesobject.h 2025-05-06 15:33:52.000000000 +0000
|
|
+++ Python-3.14.0b1/Include/internal/pycore_bytesobject.h 2025-05-17 06:44:53.614667081 +0000
|
|
@@ -20,8 +20,9 @@
|
|
|
|
// Helper for PyBytes_DecodeEscape that detects invalid escape chars.
|
|
// Export for test_peg_generator.
|
|
-PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
|
|
- const char *, const char **);
|
|
+PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
|
|
+ const char *,
|
|
+ int *, const char **);
|
|
|
|
|
|
// Substring Search.
|
|
Index: Python-3.14.0b1/Include/internal/pycore_unicodeobject.h
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Include/internal/pycore_unicodeobject.h 2025-05-06 15:33:52.000000000 +0000
|
|
+++ Python-3.14.0b1/Include/internal/pycore_unicodeobject.h 2025-05-17 06:44:53.614817134 +0000
|
|
@@ -139,14 +139,18 @@
|
|
// Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
|
|
// chars.
|
|
// Export for test_peg_generator.
|
|
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
|
|
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
|
|
const char *string, /* Unicode-Escape encoded string */
|
|
Py_ssize_t length, /* size of string */
|
|
const char *errors, /* error handling */
|
|
Py_ssize_t *consumed, /* bytes consumed */
|
|
- const char **first_invalid_escape); /* on return, points to first
|
|
- invalid escaped char in
|
|
- string. */
|
|
+ int *first_invalid_escape_char, /* on return, if not -1, contain the first
|
|
+ invalid escaped char (<= 0xff) or invalid
|
|
+ octal escape (> 0xff) in string. */
|
|
+ const char **first_invalid_escape_ptr); /* on return, if not NULL, may
|
|
+ point to the first invalid escaped
|
|
+ char in string.
|
|
+ May be NULL if errors is not NULL. */
|
|
|
|
/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
|
|
|
|
Index: Python-3.14.0b1/Lib/test/test_codeccallbacks.py
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Lib/test/test_codeccallbacks.py 2025-05-17 06:44:47.673341939 +0000
|
|
+++ Python-3.14.0b1/Lib/test/test_codeccallbacks.py 2025-05-17 06:44:53.615018793 +0000
|
|
@@ -2,6 +2,7 @@
|
|
import codecs
|
|
import html.entities
|
|
import itertools
|
|
+import re
|
|
import sys
|
|
import unicodedata
|
|
import unittest
|
|
@@ -1125,7 +1126,7 @@
|
|
text = 'abc<def>ghi'*n
|
|
text.translate(charmap)
|
|
|
|
- def test_mutatingdecodehandler(self):
|
|
+ def test_mutating_decode_handler(self):
|
|
baddata = [
|
|
("ascii", b"\xff"),
|
|
("utf-7", b"++"),
|
|
@@ -1160,6 +1161,42 @@
|
|
for (encoding, data) in baddata:
|
|
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
|
|
|
|
+ def test_mutating_decode_handler_unicode_escape(self):
|
|
+ decode = codecs.unicode_escape_decode
|
|
+ def mutating(exc):
|
|
+ if isinstance(exc, UnicodeDecodeError):
|
|
+ r = data.get(exc.object[:exc.end])
|
|
+ if r is not None:
|
|
+ exc.object = r[0] + exc.object[exc.end:]
|
|
+ return ('\u0404', r[1])
|
|
+ raise AssertionError("don't know how to handle %r" % exc)
|
|
+
|
|
+ codecs.register_error('test.mutating2', mutating)
|
|
+ data = {
|
|
+ br'\x0': (b'\\', 0),
|
|
+ br'\x3': (b'xxx\\', 3),
|
|
+ br'\x5': (b'x\\', 1),
|
|
+ }
|
|
+ def check(input, expected, msg):
|
|
+ with self.assertWarns(DeprecationWarning) as cm:
|
|
+ self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input)))
|
|
+ self.assertIn(msg, str(cm.warning))
|
|
+
|
|
+ check(br'\x0n\z', '\u0404\n\\z', r'"\z" is an invalid escape sequence')
|
|
+ check(br'\x0n\501', '\u0404\n\u0141', r'"\501" is an invalid octal escape sequence')
|
|
+ check(br'\x0z', '\u0404\\z', r'"\z" is an invalid escape sequence')
|
|
+
|
|
+ check(br'\x3n\zr', '\u0404\n\\zr', r'"\z" is an invalid escape sequence')
|
|
+ check(br'\x3zr', '\u0404\\zr', r'"\z" is an invalid escape sequence')
|
|
+ check(br'\x3z5', '\u0404\\z5', r'"\z" is an invalid escape sequence')
|
|
+ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r'"\z" is an invalid escape sequence')
|
|
+ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r'"\z" is an invalid escape sequence')
|
|
+
|
|
+ check(br'\x5n\z', '\u0404\n\\z', r'"\z" is an invalid escape sequence')
|
|
+ check(br'\x5n\501', '\u0404\n\u0141', r'"\501" is an invalid octal escape sequence')
|
|
+ check(br'\x5z', '\u0404\\z', r'"\z" is an invalid escape sequence')
|
|
+ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r'"\z" is an invalid escape sequence')
|
|
+
|
|
# issue32583
|
|
def test_crashing_decode_handler(self):
|
|
# better generating one more character to fill the extra space slot
|
|
Index: Python-3.14.0b1/Lib/test/test_codecs.py
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Lib/test/test_codecs.py 2025-05-17 06:44:47.696604117 +0000
|
|
+++ Python-3.14.0b1/Lib/test/test_codecs.py 2025-05-17 06:44:53.615449954 +0000
|
|
@@ -1196,23 +1196,39 @@
|
|
check(br"[\1010]", b"[A0]")
|
|
check(br"[\x41]", b"[A]")
|
|
check(br"[\x410]", b"[A0]")
|
|
+
|
|
+ def test_warnings(self):
|
|
+ decode = codecs.escape_decode
|
|
+ check = coding_checker(self, decode)
|
|
for i in range(97, 123):
|
|
b = bytes([i])
|
|
if b not in b'abfnrtvx':
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%c" is an invalid escape sequence' % i):
|
|
check(b"\\" + b, b"\\" + b)
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%c" is an invalid escape sequence' % (i-32)):
|
|
check(b"\\" + b.upper(), b"\\" + b.upper())
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\8" is an invalid escape sequence'):
|
|
check(br"\8", b"\\8")
|
|
with self.assertWarns(DeprecationWarning):
|
|
check(br"\9", b"\\9")
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\\xfa" is an invalid escape sequence') as cm:
|
|
check(b"\\\xfa", b"\\\xfa")
|
|
for i in range(0o400, 0o1000):
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%o" is an invalid octal escape sequence' % i):
|
|
check(rb'\%o' % i, bytes([i & 0o377]))
|
|
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\z" is an invalid escape sequence'):
|
|
+ self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4))
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\501" is an invalid octal escape sequence'):
|
|
+ self.assertEqual(decode(br'\x\501', 'ignore'), (b'A', 6))
|
|
+
|
|
def test_errors(self):
|
|
decode = codecs.escape_decode
|
|
self.assertRaises(ValueError, decode, br"\x")
|
|
@@ -2661,24 +2677,40 @@
|
|
check(br"[\x410]", "[A0]")
|
|
check(br"\u20ac", "\u20ac")
|
|
check(br"\U0001d120", "\U0001d120")
|
|
+
|
|
+ def test_decode_warnings(self):
|
|
+ decode = codecs.unicode_escape_decode
|
|
+ check = coding_checker(self, decode)
|
|
for i in range(97, 123):
|
|
b = bytes([i])
|
|
if b not in b'abfnrtuvx':
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%c" is an invalid escape sequence' % i):
|
|
check(b"\\" + b, "\\" + chr(i))
|
|
if b.upper() not in b'UN':
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%c" is an invalid escape sequence' % (i-32)):
|
|
check(b"\\" + b.upper(), "\\" + chr(i-32))
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\8" is an invalid escape sequence'):
|
|
check(br"\8", "\\8")
|
|
with self.assertWarns(DeprecationWarning):
|
|
check(br"\9", "\\9")
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\\xfa" is an invalid escape sequence') as cm:
|
|
check(b"\\\xfa", "\\\xfa")
|
|
for i in range(0o400, 0o1000):
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%o" is an invalid octal escape sequence' % i):
|
|
check(rb'\%o' % i, chr(i))
|
|
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\z" is an invalid escape sequence'):
|
|
+ self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4))
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\501" is an invalid octal escape sequence'):
|
|
+ self.assertEqual(decode(br'\x\501', 'ignore'), ('\u0141', 6))
|
|
+
|
|
def test_decode_errors(self):
|
|
decode = codecs.unicode_escape_decode
|
|
for c, d in (b'x', 2), (b'u', 4), (b'U', 4):
|
|
Index: Python-3.14.0b1/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
|
|
===================================================================
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ Python-3.14.0b1/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst 2025-05-17 06:44:53.615887918 +0000
|
|
@@ -0,0 +1,2 @@
|
|
+Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
|
|
+handler.
|
|
Index: Python-3.14.0b1/Objects/bytesobject.c
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Objects/bytesobject.c 2025-05-06 15:33:52.000000000 +0000
|
|
+++ Python-3.14.0b1/Objects/bytesobject.c 2025-05-17 06:44:53.616354335 +0000
|
|
@@ -1075,10 +1075,11 @@
|
|
}
|
|
|
|
/* Unescape a backslash-escaped string. */
|
|
-PyObject *_PyBytes_DecodeEscape(const char *s,
|
|
+PyObject *_PyBytes_DecodeEscape2(const char *s,
|
|
Py_ssize_t len,
|
|
const char *errors,
|
|
- const char **first_invalid_escape)
|
|
+ int *first_invalid_escape_char,
|
|
+ const char **first_invalid_escape_ptr)
|
|
{
|
|
int c;
|
|
char *p;
|
|
@@ -1092,7 +1093,8 @@
|
|
return NULL;
|
|
writer.overallocate = 1;
|
|
|
|
- *first_invalid_escape = NULL;
|
|
+ *first_invalid_escape_char = -1;
|
|
+ *first_invalid_escape_ptr = NULL;
|
|
|
|
end = s + len;
|
|
while (s < end) {
|
|
@@ -1130,9 +1132,10 @@
|
|
c = (c<<3) + *s++ - '0';
|
|
}
|
|
if (c > 0377) {
|
|
- if (*first_invalid_escape == NULL) {
|
|
- *first_invalid_escape = s-3; /* Back up 3 chars, since we've
|
|
- already incremented s. */
|
|
+ if (*first_invalid_escape_char == -1) {
|
|
+ *first_invalid_escape_char = c;
|
|
+ /* Back up 3 chars, since we've already incremented s. */
|
|
+ *first_invalid_escape_ptr = s - 3;
|
|
}
|
|
}
|
|
*p++ = c;
|
|
@@ -1173,9 +1176,10 @@
|
|
break;
|
|
|
|
default:
|
|
- if (*first_invalid_escape == NULL) {
|
|
- *first_invalid_escape = s-1; /* Back up one char, since we've
|
|
- already incremented s. */
|
|
+ if (*first_invalid_escape_char == -1) {
|
|
+ *first_invalid_escape_char = (unsigned char)s[-1];
|
|
+ /* Back up one char, since we've already incremented s. */
|
|
+ *first_invalid_escape_ptr = s - 1;
|
|
}
|
|
*p++ = '\\';
|
|
s--;
|
|
@@ -1195,18 +1199,19 @@
|
|
Py_ssize_t Py_UNUSED(unicode),
|
|
const char *Py_UNUSED(recode_encoding))
|
|
{
|
|
- const char* first_invalid_escape;
|
|
- PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
|
|
- &first_invalid_escape);
|
|
+ int first_invalid_escape_char;
|
|
+ const char *first_invalid_escape_ptr;
|
|
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
|
|
+ &first_invalid_escape_char,
|
|
+ &first_invalid_escape_ptr);
|
|
if (result == NULL)
|
|
return NULL;
|
|
- if (first_invalid_escape != NULL) {
|
|
- unsigned char c = *first_invalid_escape;
|
|
- if ('4' <= c && c <= '7') {
|
|
+ if (first_invalid_escape_char != -1) {
|
|
+ if (first_invalid_escape_char > 0xff) {
|
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
|
- "b\"\\%.3s\" is an invalid octal escape sequence. "
|
|
+ "b\"\\%o\" is an invalid octal escape sequence. "
|
|
"Such sequences will not work in the future. ",
|
|
- first_invalid_escape) < 0)
|
|
+ first_invalid_escape_char) < 0)
|
|
{
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
@@ -1216,7 +1221,7 @@
|
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
|
"b\"\\%c\" is an invalid escape sequence. "
|
|
"Such sequences will not work in the future. ",
|
|
- c) < 0)
|
|
+ first_invalid_escape_char) < 0)
|
|
{
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
Index: Python-3.14.0b1/Objects/unicodeobject.c
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Objects/unicodeobject.c 2025-05-06 15:33:52.000000000 +0000
|
|
+++ Python-3.14.0b1/Objects/unicodeobject.c 2025-05-17 06:44:53.617547540 +0000
|
|
@@ -6621,13 +6621,15 @@
|
|
/* --- Unicode Escape Codec ----------------------------------------------- */
|
|
|
|
PyObject *
|
|
-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
|
|
+_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s,
|
|
Py_ssize_t size,
|
|
const char *errors,
|
|
Py_ssize_t *consumed,
|
|
- const char **first_invalid_escape)
|
|
+ int *first_invalid_escape_char,
|
|
+ const char **first_invalid_escape_ptr)
|
|
{
|
|
const char *starts = s;
|
|
+ const char *initial_starts = starts;
|
|
_PyUnicodeWriter writer;
|
|
const char *end;
|
|
PyObject *errorHandler = NULL;
|
|
@@ -6635,7 +6637,8 @@
|
|
_PyUnicode_Name_CAPI *ucnhash_capi;
|
|
|
|
// so we can remember if we've seen an invalid escape char or not
|
|
- *first_invalid_escape = NULL;
|
|
+ *first_invalid_escape_char = -1;
|
|
+ *first_invalid_escape_ptr = NULL;
|
|
|
|
if (size == 0) {
|
|
if (consumed) {
|
|
@@ -6723,9 +6726,12 @@
|
|
}
|
|
}
|
|
if (ch > 0377) {
|
|
- if (*first_invalid_escape == NULL) {
|
|
- *first_invalid_escape = s-3; /* Back up 3 chars, since we've
|
|
- already incremented s. */
|
|
+ if (*first_invalid_escape_char == -1) {
|
|
+ *first_invalid_escape_char = ch;
|
|
+ if (starts == initial_starts) {
|
|
+ /* Back up 3 chars, since we've already incremented s. */
|
|
+ *first_invalid_escape_ptr = s - 3;
|
|
+ }
|
|
}
|
|
}
|
|
WRITE_CHAR(ch);
|
|
@@ -6820,9 +6826,12 @@
|
|
goto error;
|
|
|
|
default:
|
|
- if (*first_invalid_escape == NULL) {
|
|
- *first_invalid_escape = s-1; /* Back up one char, since we've
|
|
- already incremented s. */
|
|
+ if (*first_invalid_escape_char == -1) {
|
|
+ *first_invalid_escape_char = c;
|
|
+ if (starts == initial_starts) {
|
|
+ /* Back up one char, since we've already incremented s. */
|
|
+ *first_invalid_escape_ptr = s - 1;
|
|
+ }
|
|
}
|
|
WRITE_ASCII_CHAR('\\');
|
|
WRITE_CHAR(c);
|
|
@@ -6867,19 +6876,20 @@
|
|
const char *errors,
|
|
Py_ssize_t *consumed)
|
|
{
|
|
- const char *first_invalid_escape;
|
|
- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
|
|
+ int first_invalid_escape_char;
|
|
+ const char *first_invalid_escape_ptr;
|
|
+ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors,
|
|
consumed,
|
|
- &first_invalid_escape);
|
|
+ &first_invalid_escape_char,
|
|
+ &first_invalid_escape_ptr);
|
|
if (result == NULL)
|
|
return NULL;
|
|
- if (first_invalid_escape != NULL) {
|
|
- unsigned char c = *first_invalid_escape;
|
|
- if ('4' <= c && c <= '7') {
|
|
+ if (first_invalid_escape_char != -1) {
|
|
+ if (first_invalid_escape_char > 0xff) {
|
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
|
- "\"\\%.3s\" is an invalid octal escape sequence. "
|
|
+ "\"\\%o\" is an invalid octal escape sequence. "
|
|
"Such sequences will not work in the future. ",
|
|
- first_invalid_escape) < 0)
|
|
+ first_invalid_escape_char) < 0)
|
|
{
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
@@ -6889,7 +6899,7 @@
|
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
|
"\"\\%c\" is an invalid escape sequence. "
|
|
"Such sequences will not work in the future. ",
|
|
- c) < 0)
|
|
+ first_invalid_escape_char) < 0)
|
|
{
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
Index: Python-3.14.0b1/Parser/string_parser.c
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Parser/string_parser.c 2025-05-06 15:33:52.000000000 +0000
|
|
+++ Python-3.14.0b1/Parser/string_parser.c 2025-05-17 06:44:53.618734552 +0000
|
|
@@ -196,15 +196,18 @@
|
|
len = (size_t)(p - buf);
|
|
s = buf;
|
|
|
|
- const char *first_invalid_escape;
|
|
- v = _PyUnicode_DecodeUnicodeEscapeInternal(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape);
|
|
+ int first_invalid_escape_char;
|
|
+ const char *first_invalid_escape_ptr;
|
|
+ v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL,
|
|
+ &first_invalid_escape_char,
|
|
+ &first_invalid_escape_ptr);
|
|
|
|
// HACK: later we can simply pass the line no, since we don't preserve the tokens
|
|
// when we are decoding the string but we preserve the line numbers.
|
|
- if (v != NULL && first_invalid_escape != NULL && t != NULL) {
|
|
- if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
|
|
- /* We have not decref u before because first_invalid_escape points
|
|
- inside u. */
|
|
+ if (v != NULL && first_invalid_escape_ptr != NULL && t != NULL) {
|
|
+ if (warn_invalid_escape_sequence(parser, s, first_invalid_escape_ptr, t) < 0) {
|
|
+ /* We have not decref u before because first_invalid_escape_ptr
|
|
+ points inside u. */
|
|
Py_XDECREF(u);
|
|
Py_DECREF(v);
|
|
return NULL;
|
|
@@ -217,14 +220,17 @@
|
|
static PyObject *
|
|
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
|
|
{
|
|
- const char *first_invalid_escape;
|
|
- PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
|
|
+ int first_invalid_escape_char;
|
|
+ const char *first_invalid_escape_ptr;
|
|
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL,
|
|
+ &first_invalid_escape_char,
|
|
+ &first_invalid_escape_ptr);
|
|
if (result == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
- if (first_invalid_escape != NULL) {
|
|
- if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
|
|
+ if (first_invalid_escape_ptr != NULL) {
|
|
+ if (warn_invalid_escape_sequence(p, s, first_invalid_escape_ptr, t) < 0) {
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
}
|