forked from pool/python314
CVE-2025-4516 (bsc#1243273) blocking DecodeError handling vulnerability, which could lead to DoS. OBS-URL: https://build.opensuse.org/package/show/devel:languages:python:Factory/python314?expand=0&rev=67
492 lines
22 KiB
Diff
492 lines
22 KiB
Diff
From 3a939ff2298d147459116f98a09549d0f1954039 Mon Sep 17 00:00:00 2001
|
|
From: Serhiy Storchaka <storchaka@gmail.com>
|
|
Date: Tue, 4 Feb 2025 11:44:39 +0200
|
|
Subject: [PATCH 1/2] Fix use-after-free in the unicode-escape decoder with
|
|
error handler
|
|
|
|
If the error handler is used, a new bytes object is created to set as
|
|
the object attribute of UnicodeDecodeError, and that bytes object then
|
|
replaces the original data. A pointer to the decoded data will became invalid
|
|
after destroying that temporary bytes object. So we need other way to return
|
|
the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
|
|
|
|
_PyBytes_DecodeEscape() does not have such issue, because it does not
|
|
use the error handlers registry, but it should be changed for compatibility
|
|
with _PyUnicode_DecodeUnicodeEscapeInternal().
|
|
---
|
|
Include/internal/pycore_bytesobject.h | 4
|
|
Include/internal/pycore_unicodeobject.h | 8 -
|
|
Lib/test/test_codeccallbacks.py | 39 +++++++
|
|
Lib/test/test_codecs.py | 52 ++++++++--
|
|
Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2
|
|
Objects/bytesobject.c | 29 ++---
|
|
Objects/unicodeobject.c | 29 ++---
|
|
Parser/string_parser.c | 35 +++---
|
|
8 files changed, 130 insertions(+), 68 deletions(-)
|
|
|
|
Index: Python-3.14.0b1/Include/internal/pycore_bytesobject.h
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Include/internal/pycore_bytesobject.h 2025-05-06 15:33:52.000000000 +0000
|
|
+++ Python-3.14.0b1/Include/internal/pycore_bytesobject.h 2025-05-16 13:36:00.105684436 +0000
|
|
@@ -20,8 +20,8 @@
|
|
|
|
// Helper for PyBytes_DecodeEscape that detects invalid escape chars.
|
|
// Export for test_peg_generator.
|
|
-PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
|
|
- const char *, const char **);
|
|
+PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
|
|
+ const char *, int *);
|
|
|
|
|
|
// Substring Search.
|
|
Index: Python-3.14.0b1/Include/internal/pycore_unicodeobject.h
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Include/internal/pycore_unicodeobject.h 2025-05-06 15:33:52.000000000 +0000
|
|
+++ Python-3.14.0b1/Include/internal/pycore_unicodeobject.h 2025-05-16 13:36:00.105822997 +0000
|
|
@@ -139,14 +139,14 @@
|
|
// Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
|
|
// chars.
|
|
// Export for test_peg_generator.
|
|
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
|
|
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
|
|
const char *string, /* Unicode-Escape encoded string */
|
|
Py_ssize_t length, /* size of string */
|
|
const char *errors, /* error handling */
|
|
Py_ssize_t *consumed, /* bytes consumed */
|
|
- const char **first_invalid_escape); /* on return, points to first
|
|
- invalid escaped char in
|
|
- string. */
|
|
+ int *first_invalid_escape); /* on return, if not -1, contain the first
|
|
+ invalid escaped char (<= 0xff) or invalid
|
|
+ octal escape (> 0xff) in string. */
|
|
|
|
/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
|
|
|
|
Index: Python-3.14.0b1/Lib/test/test_codeccallbacks.py
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Lib/test/test_codeccallbacks.py 2025-05-16 13:35:51.879288506 +0000
|
|
+++ Python-3.14.0b1/Lib/test/test_codeccallbacks.py 2025-05-16 13:36:00.105996012 +0000
|
|
@@ -2,6 +2,7 @@
|
|
import codecs
|
|
import html.entities
|
|
import itertools
|
|
+import re
|
|
import sys
|
|
import unicodedata
|
|
import unittest
|
|
@@ -1125,7 +1126,7 @@
|
|
text = 'abc<def>ghi'*n
|
|
text.translate(charmap)
|
|
|
|
- def test_mutatingdecodehandler(self):
|
|
+ def test_mutating_decode_handler(self):
|
|
baddata = [
|
|
("ascii", b"\xff"),
|
|
("utf-7", b"++"),
|
|
@@ -1160,6 +1161,42 @@
|
|
for (encoding, data) in baddata:
|
|
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
|
|
|
|
+ def test_mutating_decode_handler_unicode_escape(self):
|
|
+ decode = codecs.unicode_escape_decode
|
|
+ def mutating(exc):
|
|
+ if isinstance(exc, UnicodeDecodeError):
|
|
+ r = data.get(exc.object[:exc.end])
|
|
+ if r is not None:
|
|
+ exc.object = r[0] + exc.object[exc.end:]
|
|
+ return ('\u0404', r[1])
|
|
+ raise AssertionError("don't know how to handle %r" % exc)
|
|
+
|
|
+ codecs.register_error('test.mutating2', mutating)
|
|
+ data = {
|
|
+ br'\x0': (b'\\', 0),
|
|
+ br'\x3': (b'xxx\\', 3),
|
|
+ br'\x5': (b'x\\', 1),
|
|
+ }
|
|
+ def check(input, expected, msg):
|
|
+ with self.assertWarns(DeprecationWarning) as cm:
|
|
+ self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input)))
|
|
+ self.assertIn(msg, str(cm.warning))
|
|
+
|
|
+ check(br'\x0n\z', '\u0404\n\\z', r'"\z" is an invalid escape sequence')
|
|
+ check(br'\x0n\501', '\u0404\n\u0141', r'"\501" is an invalid octal escape sequence')
|
|
+ check(br'\x0z', '\u0404\\z', r'"\z" is an invalid escape sequence')
|
|
+
|
|
+ check(br'\x3n\zr', '\u0404\n\\zr', r'"\z" is an invalid escape sequence')
|
|
+ check(br'\x3zr', '\u0404\\zr', r'"\z" is an invalid escape sequence')
|
|
+ check(br'\x3z5', '\u0404\\z5', r'"\z" is an invalid escape sequence')
|
|
+ check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r'"\z" is an invalid escape sequence')
|
|
+ check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r'"\z" is an invalid escape sequence')
|
|
+
|
|
+ check(br'\x5n\z', '\u0404\n\\z', r'"\z" is an invalid escape sequence')
|
|
+ check(br'\x5n\501', '\u0404\n\u0141', r'"\501" is an invalid octal escape sequence')
|
|
+ check(br'\x5z', '\u0404\\z', r'"\z" is an invalid escape sequence')
|
|
+ check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r'"\z" is an invalid escape sequence')
|
|
+
|
|
# issue32583
|
|
def test_crashing_decode_handler(self):
|
|
# better generating one more character to fill the extra space slot
|
|
Index: Python-3.14.0b1/Lib/test/test_codecs.py
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Lib/test/test_codecs.py 2025-05-16 13:35:51.898372232 +0000
|
|
+++ Python-3.14.0b1/Lib/test/test_codecs.py 2025-05-16 13:36:00.106452551 +0000
|
|
@@ -1196,23 +1196,39 @@
|
|
check(br"[\1010]", b"[A0]")
|
|
check(br"[\x41]", b"[A]")
|
|
check(br"[\x410]", b"[A0]")
|
|
+
|
|
+ def test_warnings(self):
|
|
+ decode = codecs.escape_decode
|
|
+ check = coding_checker(self, decode)
|
|
for i in range(97, 123):
|
|
b = bytes([i])
|
|
if b not in b'abfnrtvx':
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%c" is an invalid escape sequence' % i):
|
|
check(b"\\" + b, b"\\" + b)
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%c" is an invalid escape sequence' % (i-32)):
|
|
check(b"\\" + b.upper(), b"\\" + b.upper())
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\8" is an invalid escape sequence'):
|
|
check(br"\8", b"\\8")
|
|
with self.assertWarns(DeprecationWarning):
|
|
check(br"\9", b"\\9")
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\\xfa" is an invalid escape sequence') as cm:
|
|
check(b"\\\xfa", b"\\\xfa")
|
|
for i in range(0o400, 0o1000):
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%o" is an invalid octal escape sequence' % i):
|
|
check(rb'\%o' % i, bytes([i & 0o377]))
|
|
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\z" is an invalid escape sequence'):
|
|
+ self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4))
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\501" is an invalid octal escape sequence'):
|
|
+ self.assertEqual(decode(br'\x\501', 'ignore'), (b'A', 6))
|
|
+
|
|
def test_errors(self):
|
|
decode = codecs.escape_decode
|
|
self.assertRaises(ValueError, decode, br"\x")
|
|
@@ -2661,24 +2677,40 @@
|
|
check(br"[\x410]", "[A0]")
|
|
check(br"\u20ac", "\u20ac")
|
|
check(br"\U0001d120", "\U0001d120")
|
|
+
|
|
+ def test_decode_warnings(self):
|
|
+ decode = codecs.unicode_escape_decode
|
|
+ check = coding_checker(self, decode)
|
|
for i in range(97, 123):
|
|
b = bytes([i])
|
|
if b not in b'abfnrtuvx':
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%c" is an invalid escape sequence' % i):
|
|
check(b"\\" + b, "\\" + chr(i))
|
|
if b.upper() not in b'UN':
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%c" is an invalid escape sequence' % (i-32)):
|
|
check(b"\\" + b.upper(), "\\" + chr(i-32))
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\8" is an invalid escape sequence'):
|
|
check(br"\8", "\\8")
|
|
with self.assertWarns(DeprecationWarning):
|
|
check(br"\9", "\\9")
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\\xfa" is an invalid escape sequence') as cm:
|
|
check(b"\\\xfa", "\\\xfa")
|
|
for i in range(0o400, 0o1000):
|
|
- with self.assertWarns(DeprecationWarning):
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\%o" is an invalid octal escape sequence' % i):
|
|
check(rb'\%o' % i, chr(i))
|
|
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\z" is an invalid escape sequence'):
|
|
+ self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4))
|
|
+ with self.assertWarnsRegex(DeprecationWarning,
|
|
+ r'"\\501" is an invalid octal escape sequence'):
|
|
+ self.assertEqual(decode(br'\x\501', 'ignore'), ('\u0141', 6))
|
|
+
|
|
def test_decode_errors(self):
|
|
decode = codecs.unicode_escape_decode
|
|
for c, d in (b'x', 2), (b'u', 4), (b'U', 4):
|
|
Index: Python-3.14.0b1/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
|
|
===================================================================
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ Python-3.14.0b1/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst 2025-05-16 13:36:00.109748030 +0000
|
|
@@ -0,0 +1,2 @@
|
|
+Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
|
|
+handler.
|
|
Index: Python-3.14.0b1/Objects/bytesobject.c
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Objects/bytesobject.c 2025-05-06 15:33:52.000000000 +0000
|
|
+++ Python-3.14.0b1/Objects/bytesobject.c 2025-05-16 13:36:00.106976607 +0000
|
|
@@ -1075,10 +1075,10 @@
|
|
}
|
|
|
|
/* Unescape a backslash-escaped string. */
|
|
-PyObject *_PyBytes_DecodeEscape(const char *s,
|
|
+PyObject *_PyBytes_DecodeEscape2(const char *s,
|
|
Py_ssize_t len,
|
|
const char *errors,
|
|
- const char **first_invalid_escape)
|
|
+ int *first_invalid_escape)
|
|
{
|
|
int c;
|
|
char *p;
|
|
@@ -1092,7 +1092,7 @@
|
|
return NULL;
|
|
writer.overallocate = 1;
|
|
|
|
- *first_invalid_escape = NULL;
|
|
+ *first_invalid_escape = -1;
|
|
|
|
end = s + len;
|
|
while (s < end) {
|
|
@@ -1130,9 +1130,8 @@
|
|
c = (c<<3) + *s++ - '0';
|
|
}
|
|
if (c > 0377) {
|
|
- if (*first_invalid_escape == NULL) {
|
|
- *first_invalid_escape = s-3; /* Back up 3 chars, since we've
|
|
- already incremented s. */
|
|
+ if (*first_invalid_escape == -1) {
|
|
+ *first_invalid_escape = c;
|
|
}
|
|
}
|
|
*p++ = c;
|
|
@@ -1173,9 +1172,8 @@
|
|
break;
|
|
|
|
default:
|
|
- if (*first_invalid_escape == NULL) {
|
|
- *first_invalid_escape = s-1; /* Back up one char, since we've
|
|
- already incremented s. */
|
|
+ if (*first_invalid_escape == -1) {
|
|
+ *first_invalid_escape = (unsigned char)s[-1];
|
|
}
|
|
*p++ = '\\';
|
|
s--;
|
|
@@ -1195,16 +1193,15 @@
|
|
Py_ssize_t Py_UNUSED(unicode),
|
|
const char *Py_UNUSED(recode_encoding))
|
|
{
|
|
- const char* first_invalid_escape;
|
|
- PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
|
|
+ int first_invalid_escape;
|
|
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
|
|
&first_invalid_escape);
|
|
if (result == NULL)
|
|
return NULL;
|
|
- if (first_invalid_escape != NULL) {
|
|
- unsigned char c = *first_invalid_escape;
|
|
- if ('4' <= c && c <= '7') {
|
|
+ if (first_invalid_escape != -1) {
|
|
+ if (first_invalid_escape > 0xff) {
|
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
|
- "b\"\\%.3s\" is an invalid octal escape sequence. "
|
|
+ "b\"\\%o\" is an invalid octal escape sequence. "
|
|
"Such sequences will not work in the future. ",
|
|
first_invalid_escape) < 0)
|
|
{
|
|
@@ -1216,7 +1213,7 @@
|
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
|
"b\"\\%c\" is an invalid escape sequence. "
|
|
"Such sequences will not work in the future. ",
|
|
- c) < 0)
|
|
+ first_invalid_escape) < 0)
|
|
{
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
Index: Python-3.14.0b1/Objects/unicodeobject.c
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Objects/unicodeobject.c 2025-05-06 15:33:52.000000000 +0000
|
|
+++ Python-3.14.0b1/Objects/unicodeobject.c 2025-05-16 13:36:00.108406304 +0000
|
|
@@ -6621,11 +6621,11 @@
|
|
/* --- Unicode Escape Codec ----------------------------------------------- */
|
|
|
|
PyObject *
|
|
-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
|
|
+_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s,
|
|
Py_ssize_t size,
|
|
const char *errors,
|
|
Py_ssize_t *consumed,
|
|
- const char **first_invalid_escape)
|
|
+ int *first_invalid_escape)
|
|
{
|
|
const char *starts = s;
|
|
_PyUnicodeWriter writer;
|
|
@@ -6635,7 +6635,7 @@
|
|
_PyUnicode_Name_CAPI *ucnhash_capi;
|
|
|
|
// so we can remember if we've seen an invalid escape char or not
|
|
- *first_invalid_escape = NULL;
|
|
+ *first_invalid_escape = -1;
|
|
|
|
if (size == 0) {
|
|
if (consumed) {
|
|
@@ -6723,9 +6723,8 @@
|
|
}
|
|
}
|
|
if (ch > 0377) {
|
|
- if (*first_invalid_escape == NULL) {
|
|
- *first_invalid_escape = s-3; /* Back up 3 chars, since we've
|
|
- already incremented s. */
|
|
+ if (*first_invalid_escape == -1) {
|
|
+ *first_invalid_escape = ch;
|
|
}
|
|
}
|
|
WRITE_CHAR(ch);
|
|
@@ -6820,9 +6819,8 @@
|
|
goto error;
|
|
|
|
default:
|
|
- if (*first_invalid_escape == NULL) {
|
|
- *first_invalid_escape = s-1; /* Back up one char, since we've
|
|
- already incremented s. */
|
|
+ if (*first_invalid_escape == -1) {
|
|
+ *first_invalid_escape = c;
|
|
}
|
|
WRITE_ASCII_CHAR('\\');
|
|
WRITE_CHAR(c);
|
|
@@ -6867,17 +6865,16 @@
|
|
const char *errors,
|
|
Py_ssize_t *consumed)
|
|
{
|
|
- const char *first_invalid_escape;
|
|
- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
|
|
+ int first_invalid_escape;
|
|
+ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors,
|
|
consumed,
|
|
&first_invalid_escape);
|
|
if (result == NULL)
|
|
return NULL;
|
|
- if (first_invalid_escape != NULL) {
|
|
- unsigned char c = *first_invalid_escape;
|
|
- if ('4' <= c && c <= '7') {
|
|
+ if (first_invalid_escape != -1) {
|
|
+ if (first_invalid_escape > 0xff) {
|
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
|
- "\"\\%.3s\" is an invalid octal escape sequence. "
|
|
+ "\"\\%o\" is an invalid octal escape sequence. "
|
|
"Such sequences will not work in the future. ",
|
|
first_invalid_escape) < 0)
|
|
{
|
|
@@ -6889,7 +6886,7 @@
|
|
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
|
|
"\"\\%c\" is an invalid escape sequence. "
|
|
"Such sequences will not work in the future. ",
|
|
- c) < 0)
|
|
+ first_invalid_escape) < 0)
|
|
{
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
Index: Python-3.14.0b1/Parser/string_parser.c
|
|
===================================================================
|
|
--- Python-3.14.0b1.orig/Parser/string_parser.c 2025-05-06 15:33:52.000000000 +0000
|
|
+++ Python-3.14.0b1/Parser/string_parser.c 2025-05-16 13:43:14.912647780 +0000
|
|
@@ -1,6 +1,6 @@
|
|
#include <Python.h>
|
|
-#include "pycore_bytesobject.h" // _PyBytes_DecodeEscape()
|
|
-#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal()
|
|
+#include "pycore_bytesobject.h" // _PyBytes_DecodeEscape2()
|
|
+#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal2()
|
|
|
|
#include "lexer/state.h"
|
|
#include "pegen.h"
|
|
@@ -11,34 +11,34 @@
|
|
//// STRING HANDLING FUNCTIONS ////
|
|
|
|
static int
|
|
-warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t)
|
|
+warn_invalid_escape_sequence(Parser *p, const char* buffer, int first_invalid_escape, Token *t)
|
|
{
|
|
if (p->call_invalid_rules) {
|
|
// Do not report warnings if we are in the second pass of the parser
|
|
// to avoid showing the warning twice.
|
|
return 0;
|
|
}
|
|
- unsigned char c = (unsigned char)*first_invalid_escape;
|
|
if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END || t->type == TSTRING_MIDDLE || t->type == TSTRING_END)
|
|
- && (c == '{' || c == '}')) {
|
|
+ && (first_invalid_escape == '{' || first_invalid_escape == '}')) {
|
|
// in this case the tokenizer has already emitted a warning,
|
|
// see Parser/tokenizer/helpers.c:warn_invalid_escape_sequence
|
|
return 0;
|
|
}
|
|
|
|
- int octal = ('4' <= c && c <= '7');
|
|
+ assert(first_invalid_escape >= 0);
|
|
+ int octal = (first_invalid_escape > 0xff);
|
|
PyObject *msg =
|
|
octal
|
|
? PyUnicode_FromFormat(
|
|
- "\"\\%.3s\" is an invalid octal escape sequence. "
|
|
+ "\"\\%o\" is an invalid octal escape sequence. "
|
|
"Such sequences will not work in the future. "
|
|
- "Did you mean \"\\\\%.3s\"? A raw string is also an option.",
|
|
+ "Did you mean \"\\\\%o\"? A raw string is also an option.",
|
|
first_invalid_escape, first_invalid_escape)
|
|
: PyUnicode_FromFormat(
|
|
"\"\\%c\" is an invalid escape sequence. "
|
|
"Such sequences will not work in the future. "
|
|
"Did you mean \"\\\\%c\"? A raw string is also an option.",
|
|
- c, c);
|
|
+ first_invalid_escape, first_invalid_escape);
|
|
if (msg == NULL) {
|
|
return -1;
|
|
}
|
|
@@ -196,34 +196,31 @@
|
|
len = (size_t)(p - buf);
|
|
s = buf;
|
|
|
|
- const char *first_invalid_escape;
|
|
- v = _PyUnicode_DecodeUnicodeEscapeInternal(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape);
|
|
+ int first_invalid_escape;
|
|
+ v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape);
|
|
+ Py_XDECREF(u);
|
|
|
|
// HACK: later we can simply pass the line no, since we don't preserve the tokens
|
|
// when we are decoding the string but we preserve the line numbers.
|
|
- if (v != NULL && first_invalid_escape != NULL && t != NULL) {
|
|
+ if (v != NULL && first_invalid_escape != -1 && t != NULL) {
|
|
if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
|
|
- /* We have not decref u before because first_invalid_escape points
|
|
- inside u. */
|
|
- Py_XDECREF(u);
|
|
Py_DECREF(v);
|
|
return NULL;
|
|
}
|
|
}
|
|
- Py_XDECREF(u);
|
|
return v;
|
|
}
|
|
|
|
static PyObject *
|
|
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
|
|
{
|
|
- const char *first_invalid_escape;
|
|
- PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
|
|
+ int first_invalid_escape;
|
|
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL, &first_invalid_escape);
|
|
if (result == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
- if (first_invalid_escape != NULL) {
|
|
+ if (first_invalid_escape != -1) {
|
|
if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
|
|
Py_DECREF(result);
|
|
return NULL;
|