2025-05-17 06:47:24 +00:00
committed by Git OBS Bridge
parent 70a5881572
commit a4883c39a6

View File

@@ -1,49 +1,35 @@
From 3a939ff2298d147459116f98a09549d0f1954039 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Tue, 4 Feb 2025 11:44:39 +0200
Subject: [PATCH 1/2] Fix use-after-free in the unicode-escape decoder with
error handler
If the error handler is used, a new bytes object is created to set as
the object attribute of UnicodeDecodeError, and that bytes object then
replaces the original data. A pointer to the decoded data will became invalid
after destroying that temporary bytes object. So we need other way to return
the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
_PyBytes_DecodeEscape() does not have such issue, because it does not
use the error handlers registry, but it should be changed for compatibility
with _PyUnicode_DecodeUnicodeEscapeInternal().
--- ---
Include/internal/pycore_bytesobject.h | 4 Include/internal/pycore_bytesobject.h | 5
Include/internal/pycore_unicodeobject.h | 8 - Include/internal/pycore_unicodeobject.h | 12 +-
Lib/test/test_codeccallbacks.py | 39 +++++++ Lib/test/test_codeccallbacks.py | 39 +++++++
Lib/test/test_codecs.py | 52 ++++++++-- Lib/test/test_codecs.py | 52 ++++++++--
Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2 Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2
Objects/bytesobject.c | 29 ++--- Objects/bytesobject.c | 41 ++++---
Objects/unicodeobject.c | 29 ++--- Objects/unicodeobject.c | 46 +++++---
Parser/string_parser.c | 35 +++--- Parser/string_parser.c | 26 +++--
8 files changed, 130 insertions(+), 68 deletions(-) 8 files changed, 160 insertions(+), 63 deletions(-)
Index: Python-3.14.0b1/Include/internal/pycore_bytesobject.h Index: Python-3.14.0b1/Include/internal/pycore_bytesobject.h
=================================================================== ===================================================================
--- Python-3.14.0b1.orig/Include/internal/pycore_bytesobject.h 2025-05-06 15:33:52.000000000 +0000 --- Python-3.14.0b1.orig/Include/internal/pycore_bytesobject.h 2025-05-06 15:33:52.000000000 +0000
+++ Python-3.14.0b1/Include/internal/pycore_bytesobject.h 2025-05-16 13:36:00.105684436 +0000 +++ Python-3.14.0b1/Include/internal/pycore_bytesobject.h 2025-05-17 06:44:53.614667081 +0000
@@ -20,8 +20,8 @@ @@ -20,8 +20,9 @@
// Helper for PyBytes_DecodeEscape that detects invalid escape chars. // Helper for PyBytes_DecodeEscape that detects invalid escape chars.
// Export for test_peg_generator. // Export for test_peg_generator.
-PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape(const char *, Py_ssize_t, -PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
- const char *, const char **); - const char *, const char **);
+PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t, +PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
+ const char *, int *); + const char *,
+ int *, const char **);
// Substring Search. // Substring Search.
Index: Python-3.14.0b1/Include/internal/pycore_unicodeobject.h Index: Python-3.14.0b1/Include/internal/pycore_unicodeobject.h
=================================================================== ===================================================================
--- Python-3.14.0b1.orig/Include/internal/pycore_unicodeobject.h 2025-05-06 15:33:52.000000000 +0000 --- Python-3.14.0b1.orig/Include/internal/pycore_unicodeobject.h 2025-05-06 15:33:52.000000000 +0000
+++ Python-3.14.0b1/Include/internal/pycore_unicodeobject.h 2025-05-16 13:36:00.105822997 +0000 +++ Python-3.14.0b1/Include/internal/pycore_unicodeobject.h 2025-05-17 06:44:53.614817134 +0000
@@ -139,14 +139,14 @@ @@ -139,14 +139,18 @@
// Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape // Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
// chars. // chars.
// Export for test_peg_generator. // Export for test_peg_generator.
@@ -56,16 +42,20 @@ Index: Python-3.14.0b1/Include/internal/pycore_unicodeobject.h
- const char **first_invalid_escape); /* on return, points to first - const char **first_invalid_escape); /* on return, points to first
- invalid escaped char in - invalid escaped char in
- string. */ - string. */
+ int *first_invalid_escape); /* on return, if not -1, contain the first + int *first_invalid_escape_char, /* on return, if not -1, contain the first
+ invalid escaped char (<= 0xff) or invalid + invalid escaped char (<= 0xff) or invalid
+ octal escape (> 0xff) in string. */ + octal escape (> 0xff) in string. */
+ const char **first_invalid_escape_ptr); /* on return, if not NULL, may
+ point to the first invalid escaped
+ char in string.
+ May be NULL if errors is not NULL. */
/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */ /* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
Index: Python-3.14.0b1/Lib/test/test_codeccallbacks.py Index: Python-3.14.0b1/Lib/test/test_codeccallbacks.py
=================================================================== ===================================================================
--- Python-3.14.0b1.orig/Lib/test/test_codeccallbacks.py 2025-05-16 13:35:51.879288506 +0000 --- Python-3.14.0b1.orig/Lib/test/test_codeccallbacks.py 2025-05-17 06:44:47.673341939 +0000
+++ Python-3.14.0b1/Lib/test/test_codeccallbacks.py 2025-05-16 13:36:00.105996012 +0000 +++ Python-3.14.0b1/Lib/test/test_codeccallbacks.py 2025-05-17 06:44:53.615018793 +0000
@@ -2,6 +2,7 @@ @@ -2,6 +2,7 @@
import codecs import codecs
import html.entities import html.entities
@@ -128,8 +118,8 @@ Index: Python-3.14.0b1/Lib/test/test_codeccallbacks.py
# better generating one more character to fill the extra space slot # better generating one more character to fill the extra space slot
Index: Python-3.14.0b1/Lib/test/test_codecs.py Index: Python-3.14.0b1/Lib/test/test_codecs.py
=================================================================== ===================================================================
--- Python-3.14.0b1.orig/Lib/test/test_codecs.py 2025-05-16 13:35:51.898372232 +0000 --- Python-3.14.0b1.orig/Lib/test/test_codecs.py 2025-05-17 06:44:47.696604117 +0000
+++ Python-3.14.0b1/Lib/test/test_codecs.py 2025-05-16 13:36:00.106452551 +0000 +++ Python-3.14.0b1/Lib/test/test_codecs.py 2025-05-17 06:44:53.615449954 +0000
@@ -1196,23 +1196,39 @@ @@ -1196,23 +1196,39 @@
check(br"[\1010]", b"[A0]") check(br"[\1010]", b"[A0]")
check(br"[\x41]", b"[A]") check(br"[\x41]", b"[A]")
@@ -224,15 +214,15 @@ Index: Python-3.14.0b1/Lib/test/test_codecs.py
Index: Python-3.14.0b1/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst Index: Python-3.14.0b1/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
=================================================================== ===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ Python-3.14.0b1/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst 2025-05-16 13:36:00.109748030 +0000 +++ Python-3.14.0b1/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst 2025-05-17 06:44:53.615887918 +0000
@@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
+Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error +Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
+handler. +handler.
Index: Python-3.14.0b1/Objects/bytesobject.c Index: Python-3.14.0b1/Objects/bytesobject.c
=================================================================== ===================================================================
--- Python-3.14.0b1.orig/Objects/bytesobject.c 2025-05-06 15:33:52.000000000 +0000 --- Python-3.14.0b1.orig/Objects/bytesobject.c 2025-05-06 15:33:52.000000000 +0000
+++ Python-3.14.0b1/Objects/bytesobject.c 2025-05-16 13:36:00.106976607 +0000 +++ Python-3.14.0b1/Objects/bytesobject.c 2025-05-17 06:44:53.616354335 +0000
@@ -1075,10 +1075,10 @@ @@ -1075,10 +1075,11 @@
} }
/* Unescape a backslash-escaped string. */ /* Unescape a backslash-escaped string. */
@@ -241,79 +231,91 @@ Index: Python-3.14.0b1/Objects/bytesobject.c
Py_ssize_t len, Py_ssize_t len,
const char *errors, const char *errors,
- const char **first_invalid_escape) - const char **first_invalid_escape)
+ int *first_invalid_escape) + int *first_invalid_escape_char,
+ const char **first_invalid_escape_ptr)
{ {
int c; int c;
char *p; char *p;
@@ -1092,7 +1092,7 @@ @@ -1092,7 +1093,8 @@
return NULL; return NULL;
writer.overallocate = 1; writer.overallocate = 1;
- *first_invalid_escape = NULL; - *first_invalid_escape = NULL;
+ *first_invalid_escape = -1; + *first_invalid_escape_char = -1;
+ *first_invalid_escape_ptr = NULL;
end = s + len; end = s + len;
while (s < end) { while (s < end) {
@@ -1130,9 +1130,8 @@ @@ -1130,9 +1132,10 @@
c = (c<<3) + *s++ - '0'; c = (c<<3) + *s++ - '0';
} }
if (c > 0377) { if (c > 0377) {
- if (*first_invalid_escape == NULL) { - if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-3; /* Back up 3 chars, since we've - *first_invalid_escape = s-3; /* Back up 3 chars, since we've
- already incremented s. */ - already incremented s. */
+ if (*first_invalid_escape == -1) { + if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape = c; + *first_invalid_escape_char = c;
+ /* Back up 3 chars, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 3;
} }
} }
*p++ = c; *p++ = c;
@@ -1173,9 +1172,8 @@ @@ -1173,9 +1176,10 @@
break; break;
default: default:
- if (*first_invalid_escape == NULL) { - if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-1; /* Back up one char, since we've - *first_invalid_escape = s-1; /* Back up one char, since we've
- already incremented s. */ - already incremented s. */
+ if (*first_invalid_escape == -1) { + if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape = (unsigned char)s[-1]; + *first_invalid_escape_char = (unsigned char)s[-1];
+ /* Back up one char, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 1;
} }
*p++ = '\\'; *p++ = '\\';
s--; s--;
@@ -1195,16 +1193,15 @@ @@ -1195,18 +1199,19 @@
Py_ssize_t Py_UNUSED(unicode), Py_ssize_t Py_UNUSED(unicode),
const char *Py_UNUSED(recode_encoding)) const char *Py_UNUSED(recode_encoding))
{ {
- const char* first_invalid_escape; - const char* first_invalid_escape;
- PyObject *result = _PyBytes_DecodeEscape(s, len, errors, - PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
+ int first_invalid_escape; - &first_invalid_escape);
+ int first_invalid_escape_char;
+ const char *first_invalid_escape_ptr;
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, errors, + PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
&first_invalid_escape); + &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
if (result == NULL) if (result == NULL)
return NULL; return NULL;
- if (first_invalid_escape != NULL) { - if (first_invalid_escape != NULL) {
- unsigned char c = *first_invalid_escape; - unsigned char c = *first_invalid_escape;
- if ('4' <= c && c <= '7') { - if ('4' <= c && c <= '7') {
+ if (first_invalid_escape != -1) { + if (first_invalid_escape_char != -1) {
+ if (first_invalid_escape > 0xff) { + if (first_invalid_escape_char > 0xff) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "b\"\\%.3s\" is an invalid octal escape sequence. " - "b\"\\%.3s\" is an invalid octal escape sequence. "
+ "b\"\\%o\" is an invalid octal escape sequence. " + "b\"\\%o\" is an invalid octal escape sequence. "
"Such sequences will not work in the future. ", "Such sequences will not work in the future. ",
first_invalid_escape) < 0) - first_invalid_escape) < 0)
+ first_invalid_escape_char) < 0)
{ {
@@ -1216,7 +1213,7 @@ Py_DECREF(result);
return NULL;
@@ -1216,7 +1221,7 @@
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"b\"\\%c\" is an invalid escape sequence. " "b\"\\%c\" is an invalid escape sequence. "
"Such sequences will not work in the future. ", "Such sequences will not work in the future. ",
- c) < 0) - c) < 0)
+ first_invalid_escape) < 0) + first_invalid_escape_char) < 0)
{ {
Py_DECREF(result); Py_DECREF(result);
return NULL; return NULL;
Index: Python-3.14.0b1/Objects/unicodeobject.c Index: Python-3.14.0b1/Objects/unicodeobject.c
=================================================================== ===================================================================
--- Python-3.14.0b1.orig/Objects/unicodeobject.c 2025-05-06 15:33:52.000000000 +0000 --- Python-3.14.0b1.orig/Objects/unicodeobject.c 2025-05-06 15:33:52.000000000 +0000
+++ Python-3.14.0b1/Objects/unicodeobject.c 2025-05-16 13:36:00.108406304 +0000 +++ Python-3.14.0b1/Objects/unicodeobject.c 2025-05-17 06:44:53.617547540 +0000
@@ -6621,11 +6621,11 @@ @@ -6621,13 +6621,15 @@
/* --- Unicode Escape Codec ----------------------------------------------- */ /* --- Unicode Escape Codec ----------------------------------------------- */
PyObject * PyObject *
@@ -323,169 +325,142 @@ Index: Python-3.14.0b1/Objects/unicodeobject.c
const char *errors, const char *errors,
Py_ssize_t *consumed, Py_ssize_t *consumed,
- const char **first_invalid_escape) - const char **first_invalid_escape)
+ int *first_invalid_escape) + int *first_invalid_escape_char,
+ const char **first_invalid_escape_ptr)
{ {
const char *starts = s; const char *starts = s;
+ const char *initial_starts = starts;
_PyUnicodeWriter writer; _PyUnicodeWriter writer;
@@ -6635,7 +6635,7 @@ const char *end;
PyObject *errorHandler = NULL;
@@ -6635,7 +6637,8 @@
_PyUnicode_Name_CAPI *ucnhash_capi; _PyUnicode_Name_CAPI *ucnhash_capi;
// so we can remember if we've seen an invalid escape char or not // so we can remember if we've seen an invalid escape char or not
- *first_invalid_escape = NULL; - *first_invalid_escape = NULL;
+ *first_invalid_escape = -1; + *first_invalid_escape_char = -1;
+ *first_invalid_escape_ptr = NULL;
if (size == 0) { if (size == 0) {
if (consumed) { if (consumed) {
@@ -6723,9 +6723,8 @@ @@ -6723,9 +6726,12 @@
} }
} }
if (ch > 0377) { if (ch > 0377) {
- if (*first_invalid_escape == NULL) { - if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-3; /* Back up 3 chars, since we've - *first_invalid_escape = s-3; /* Back up 3 chars, since we've
- already incremented s. */ - already incremented s. */
+ if (*first_invalid_escape == -1) { + if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape = ch; + *first_invalid_escape_char = ch;
+ if (starts == initial_starts) {
+ /* Back up 3 chars, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 3;
+ }
} }
} }
WRITE_CHAR(ch); WRITE_CHAR(ch);
@@ -6820,9 +6819,8 @@ @@ -6820,9 +6826,12 @@
goto error; goto error;
default: default:
- if (*first_invalid_escape == NULL) { - if (*first_invalid_escape == NULL) {
- *first_invalid_escape = s-1; /* Back up one char, since we've - *first_invalid_escape = s-1; /* Back up one char, since we've
- already incremented s. */ - already incremented s. */
+ if (*first_invalid_escape == -1) { + if (*first_invalid_escape_char == -1) {
+ *first_invalid_escape = c; + *first_invalid_escape_char = c;
+ if (starts == initial_starts) {
+ /* Back up one char, since we've already incremented s. */
+ *first_invalid_escape_ptr = s - 1;
+ }
} }
WRITE_ASCII_CHAR('\\'); WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c); WRITE_CHAR(c);
@@ -6867,17 +6865,16 @@ @@ -6867,19 +6876,20 @@
const char *errors, const char *errors,
Py_ssize_t *consumed) Py_ssize_t *consumed)
{ {
- const char *first_invalid_escape; - const char *first_invalid_escape;
- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors, - PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
+ int first_invalid_escape; + int first_invalid_escape_char;
+ const char *first_invalid_escape_ptr;
+ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors, + PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors,
consumed, consumed,
&first_invalid_escape); - &first_invalid_escape);
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
if (result == NULL) if (result == NULL)
return NULL; return NULL;
- if (first_invalid_escape != NULL) { - if (first_invalid_escape != NULL) {
- unsigned char c = *first_invalid_escape; - unsigned char c = *first_invalid_escape;
- if ('4' <= c && c <= '7') { - if ('4' <= c && c <= '7') {
+ if (first_invalid_escape != -1) { + if (first_invalid_escape_char != -1) {
+ if (first_invalid_escape > 0xff) { + if (first_invalid_escape_char > 0xff) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "\"\\%.3s\" is an invalid octal escape sequence. " - "\"\\%.3s\" is an invalid octal escape sequence. "
+ "\"\\%o\" is an invalid octal escape sequence. " + "\"\\%o\" is an invalid octal escape sequence. "
"Such sequences will not work in the future. ", "Such sequences will not work in the future. ",
first_invalid_escape) < 0) - first_invalid_escape) < 0)
+ first_invalid_escape_char) < 0)
{ {
@@ -6889,7 +6886,7 @@ Py_DECREF(result);
return NULL;
@@ -6889,7 +6899,7 @@
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"\"\\%c\" is an invalid escape sequence. " "\"\\%c\" is an invalid escape sequence. "
"Such sequences will not work in the future. ", "Such sequences will not work in the future. ",
- c) < 0) - c) < 0)
+ first_invalid_escape) < 0) + first_invalid_escape_char) < 0)
{ {
Py_DECREF(result); Py_DECREF(result);
return NULL; return NULL;
Index: Python-3.14.0b1/Parser/string_parser.c Index: Python-3.14.0b1/Parser/string_parser.c
=================================================================== ===================================================================
--- Python-3.14.0b1.orig/Parser/string_parser.c 2025-05-06 15:33:52.000000000 +0000 --- Python-3.14.0b1.orig/Parser/string_parser.c 2025-05-06 15:33:52.000000000 +0000
+++ Python-3.14.0b1/Parser/string_parser.c 2025-05-16 13:43:14.912647780 +0000 +++ Python-3.14.0b1/Parser/string_parser.c 2025-05-17 06:44:53.618734552 +0000
@@ -1,6 +1,6 @@ @@ -196,15 +196,18 @@
#include <Python.h>
-#include "pycore_bytesobject.h" // _PyBytes_DecodeEscape()
-#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal()
+#include "pycore_bytesobject.h" // _PyBytes_DecodeEscape2()
+#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal2()
#include "lexer/state.h"
#include "pegen.h"
@@ -11,34 +11,34 @@
//// STRING HANDLING FUNCTIONS ////
static int
-warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t)
+warn_invalid_escape_sequence(Parser *p, const char* buffer, int first_invalid_escape, Token *t)
{
if (p->call_invalid_rules) {
// Do not report warnings if we are in the second pass of the parser
// to avoid showing the warning twice.
return 0;
}
- unsigned char c = (unsigned char)*first_invalid_escape;
if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END || t->type == TSTRING_MIDDLE || t->type == TSTRING_END)
- && (c == '{' || c == '}')) {
+ && (first_invalid_escape == '{' || first_invalid_escape == '}')) {
// in this case the tokenizer has already emitted a warning,
// see Parser/tokenizer/helpers.c:warn_invalid_escape_sequence
return 0;
}
- int octal = ('4' <= c && c <= '7');
+ assert(first_invalid_escape >= 0);
+ int octal = (first_invalid_escape > 0xff);
PyObject *msg =
octal
? PyUnicode_FromFormat(
- "\"\\%.3s\" is an invalid octal escape sequence. "
+ "\"\\%o\" is an invalid octal escape sequence. "
"Such sequences will not work in the future. "
- "Did you mean \"\\\\%.3s\"? A raw string is also an option.",
+ "Did you mean \"\\\\%o\"? A raw string is also an option.",
first_invalid_escape, first_invalid_escape)
: PyUnicode_FromFormat(
"\"\\%c\" is an invalid escape sequence. "
"Such sequences will not work in the future. "
"Did you mean \"\\\\%c\"? A raw string is also an option.",
- c, c);
+ first_invalid_escape, first_invalid_escape);
if (msg == NULL) {
return -1;
}
@@ -196,34 +196,31 @@
len = (size_t)(p - buf); len = (size_t)(p - buf);
s = buf; s = buf;
- const char *first_invalid_escape; - const char *first_invalid_escape;
- v = _PyUnicode_DecodeUnicodeEscapeInternal(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape); - v = _PyUnicode_DecodeUnicodeEscapeInternal(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape);
+ int first_invalid_escape; + int first_invalid_escape_char;
+ v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape); + const char *first_invalid_escape_ptr;
+ Py_XDECREF(u); + v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL,
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
// HACK: later we can simply pass the line no, since we don't preserve the tokens // HACK: later we can simply pass the line no, since we don't preserve the tokens
// when we are decoding the string but we preserve the line numbers. // when we are decoding the string but we preserve the line numbers.
- if (v != NULL && first_invalid_escape != NULL && t != NULL) { - if (v != NULL && first_invalid_escape != NULL && t != NULL) {
+ if (v != NULL && first_invalid_escape != -1 && t != NULL) { - if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
- /* We have not decref u before because first_invalid_escape points - /* We have not decref u before because first_invalid_escape points
- inside u. */ - inside u. */
- Py_XDECREF(u); + if (v != NULL && first_invalid_escape_ptr != NULL && t != NULL) {
+ if (warn_invalid_escape_sequence(parser, s, first_invalid_escape_ptr, t) < 0) {
+ /* We have not decref u before because first_invalid_escape_ptr
+ points inside u. */
Py_XDECREF(u);
Py_DECREF(v); Py_DECREF(v);
return NULL; return NULL;
} @@ -217,14 +220,17 @@
}
- Py_XDECREF(u);
return v;
}
static PyObject * static PyObject *
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
{ {
- const char *first_invalid_escape; - const char *first_invalid_escape;
- PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape); - PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
+ int first_invalid_escape; + int first_invalid_escape_char;
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL, &first_invalid_escape); + const char *first_invalid_escape_ptr;
+ PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL,
+ &first_invalid_escape_char,
+ &first_invalid_escape_ptr);
if (result == NULL) { if (result == NULL) {
return NULL; return NULL;
} }
- if (first_invalid_escape != NULL) { - if (first_invalid_escape != NULL) {
+ if (first_invalid_escape != -1) { - if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) { + if (first_invalid_escape_ptr != NULL) {
+ if (warn_invalid_escape_sequence(p, s, first_invalid_escape_ptr, t) < 0) {
Py_DECREF(result); Py_DECREF(result);
return NULL; return NULL;
}