14
0

Accepting request 904606 from home:mcalabkova:branches:devel:languages:python

- Add upstream patch unicode.patch to fix random python crash

OBS-URL: https://build.opensuse.org/request/show/904606
OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-pybind11?expand=0&rev=27
This commit is contained in:
2021-07-08 10:06:06 +00:00
committed by Git OBS Bridge
parent 1cdf4c9984
commit 37c1ce1f87
3 changed files with 233 additions and 1 deletions

View File

@@ -1,3 +1,8 @@
-------------------------------------------------------------------
Wed Jul 7 14:30:20 UTC 2021 - Markéta Machová <mmachova@suse.com>
- Add upstream patch unicode.patch to fix random python crash
-------------------------------------------------------------------
Mon Feb 15 00:56:32 UTC 2021 - Benjamin Greiner <code@bnavigator.de>

View File

@@ -24,14 +24,16 @@ Summary: Module for operability between C++11 and Python
License: BSD-3-Clause
URL: https://github.com/pybind/pybind11
Source: https://github.com/pybind/pybind11/archive/v%{version}.tar.gz#/pybind11-%{version}.tar.gz
# PATCH-FIX-UPSTREAM https://github.com/pybind/pybind11/commit/0c93a0f3fcf6bf26be584558d7426564720cea6f Fix Unicode support for ostream redirects
Patch0: unicode.patch
BuildRequires: %{python_module devel}
BuildRequires: %{python_module numpy if (%python-base without python36-base)}
BuildRequires: %{python_module pytest}
BuildRequires: %{python_module setuptools}
BuildRequires: cmake
BuildRequires: fdupes
BuildRequires: gcc-c++
BuildRequires: python-rpm-macros
BuildRequires: %{python_module numpy if (%python-base without python36-base)}
BuildArch: noarch
%python_subpackages
@@ -59,6 +61,7 @@ This package contains files for developing applications using pybind11.
%prep
%setup -q -n pybind11-%{version}
%autopatch -p1
%build
%python_build

224
unicode.patch Normal file
View File

@@ -0,0 +1,224 @@
From 0c93a0f3fcf6bf26be584558d7426564720cea6f Mon Sep 17 00:00:00 2001
From: Pieter P <pieter.pas@outlook.com>
Date: Tue, 4 May 2021 07:04:38 +0200
Subject: [PATCH] Fix Unicode support for ostream redirects (#2982)
* Crash when printing Unicode to redirected cout
Add failing tests
* Fix Unicode crashes redirected cout
* pythonbuf::utf8_remainder check end iterator
* Remove trailing whitespace and formatting iostream
* Avoid buffer overflow if ostream redirect races
This doesn't solve the actual race, but at least it now has a much lower
probability of reading past the end of the buffer even when data races
do occur.
---
include/pybind11/iostream.h | 75 ++++++++++++++++++++++++++-----
tests/test_iostream.py | 90 +++++++++++++++++++++++++++++++++++++
2 files changed, 153 insertions(+), 12 deletions(-)
diff --git a/include/pybind11/iostream.h b/include/pybind11/iostream.h
index 9dee755431..7c1c718b02 100644
--- a/include/pybind11/iostream.h
+++ b/include/pybind11/iostream.h
@@ -16,6 +16,9 @@
#include <string>
#include <memory>
#include <iostream>
+#include <cstring>
+#include <iterator>
+#include <algorithm>
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
PYBIND11_NAMESPACE_BEGIN(detail)
@@ -38,25 +41,73 @@ class pythonbuf : public std::streambuf {
return sync() == 0 ? traits_type::not_eof(c) : traits_type::eof();
}
+ // Computes how many bytes at the end of the buffer are part of an
+ // incomplete sequence of UTF-8 bytes.
+ // Precondition: pbase() < pptr()
+ size_t utf8_remainder() const {
+ const auto rbase = std::reverse_iterator<char *>(pbase());
+ const auto rpptr = std::reverse_iterator<char *>(pptr());
+ auto is_ascii = [](char c) {
+ return (static_cast<unsigned char>(c) & 0x80) == 0x00;
+ };
+ auto is_leading = [](char c) {
+ return (static_cast<unsigned char>(c) & 0xC0) == 0xC0;
+ };
+ auto is_leading_2b = [](char c) {
+ return static_cast<unsigned char>(c) <= 0xDF;
+ };
+ auto is_leading_3b = [](char c) {
+ return static_cast<unsigned char>(c) <= 0xEF;
+ };
+ // If the last character is ASCII, there are no incomplete code points
+ if (is_ascii(*rpptr))
+ return 0;
+ // Otherwise, work back from the end of the buffer and find the first
+ // UTF-8 leading byte
+ const auto rpend = rbase - rpptr >= 3 ? rpptr + 3 : rbase;
+ const auto leading = std::find_if(rpptr, rpend, is_leading);
+ if (leading == rbase)
+ return 0;
+ const auto dist = static_cast<size_t>(leading - rpptr);
+ size_t remainder = 0;
+
+ if (dist == 0)
+ remainder = 1; // 1-byte code point is impossible
+ else if (dist == 1)
+ remainder = is_leading_2b(*leading) ? 0 : dist + 1;
+ else if (dist == 2)
+ remainder = is_leading_3b(*leading) ? 0 : dist + 1;
+ // else if (dist >= 3), at least 4 bytes before encountering an UTF-8
+ // leading byte, either no remainder or invalid UTF-8.
+ // Invalid UTF-8 will cause an exception later when converting
+ // to a Python string, so that's not handled here.
+ return remainder;
+ }
+
// This function must be non-virtual to be called in a destructor. If the
// rare MSVC test failure shows up with this version, then this should be
// simplified to a fully qualified call.
int _sync() {
- if (pbase() != pptr()) {
-
- {
- gil_scoped_acquire tmp;
-
+ if (pbase() != pptr()) { // If buffer is not empty
+ gil_scoped_acquire tmp;
+ // Placed inside gil_scoped_acquire as a mutex to avoid a race.
+ if (pbase() != pptr()) { // Check again under the lock
// This subtraction cannot be negative, so dropping the sign.
- str line(pbase(), static_cast<size_t>(pptr() - pbase()));
-
- pywrite(line);
- pyflush();
-
- // Placed inside gil_scoped_aquire as a mutex to avoid a race
+ auto size = static_cast<size_t>(pptr() - pbase());
+ size_t remainder = utf8_remainder();
+
+ if (size > remainder) {
+ str line(pbase(), size - remainder);
+ pywrite(line);
+ pyflush();
+ }
+
+ // Copy the remainder at the end of the buffer to the beginning:
+ if (remainder > 0)
+ std::memmove(pbase(), pptr() - remainder, remainder);
setp(pbase(), epptr());
+ pbump(static_cast<int>(remainder));
}
-
}
return 0;
}
diff --git a/tests/test_iostream.py b/tests/test_iostream.py
index 6d493beda3..e2b74d01cb 100644
--- a/tests/test_iostream.py
+++ b/tests/test_iostream.py
@@ -69,6 +69,96 @@ def test_captured_large_string(capsys):
assert stderr == ""
+def test_captured_utf8_2byte_offset0(capsys):
+ msg = "\u07FF"
+ msg = "" + msg * (1024 // len(msg) + 1)
+
+ m.captured_output_default(msg)
+ stdout, stderr = capsys.readouterr()
+ assert stdout == msg
+ assert stderr == ""
+
+
+def test_captured_utf8_2byte_offset1(capsys):
+ msg = "\u07FF"
+ msg = "1" + msg * (1024 // len(msg) + 1)
+
+ m.captured_output_default(msg)
+ stdout, stderr = capsys.readouterr()
+ assert stdout == msg
+ assert stderr == ""
+
+
+def test_captured_utf8_3byte_offset0(capsys):
+ msg = "\uFFFF"
+ msg = "" + msg * (1024 // len(msg) + 1)
+
+ m.captured_output_default(msg)
+ stdout, stderr = capsys.readouterr()
+ assert stdout == msg
+ assert stderr == ""
+
+
+def test_captured_utf8_3byte_offset1(capsys):
+ msg = "\uFFFF"
+ msg = "1" + msg * (1024 // len(msg) + 1)
+
+ m.captured_output_default(msg)
+ stdout, stderr = capsys.readouterr()
+ assert stdout == msg
+ assert stderr == ""
+
+
+def test_captured_utf8_3byte_offset2(capsys):
+ msg = "\uFFFF"
+ msg = "12" + msg * (1024 // len(msg) + 1)
+
+ m.captured_output_default(msg)
+ stdout, stderr = capsys.readouterr()
+ assert stdout == msg
+ assert stderr == ""
+
+
+def test_captured_utf8_4byte_offset0(capsys):
+ msg = "\U0010FFFF"
+ msg = "" + msg * (1024 // len(msg) + 1)
+
+ m.captured_output_default(msg)
+ stdout, stderr = capsys.readouterr()
+ assert stdout == msg
+ assert stderr == ""
+
+
+def test_captured_utf8_4byte_offset1(capsys):
+ msg = "\U0010FFFF"
+ msg = "1" + msg * (1024 // len(msg) + 1)
+
+ m.captured_output_default(msg)
+ stdout, stderr = capsys.readouterr()
+ assert stdout == msg
+ assert stderr == ""
+
+
+def test_captured_utf8_4byte_offset2(capsys):
+ msg = "\U0010FFFF"
+ msg = "12" + msg * (1024 // len(msg) + 1)
+
+ m.captured_output_default(msg)
+ stdout, stderr = capsys.readouterr()
+ assert stdout == msg
+ assert stderr == ""
+
+
+def test_captured_utf8_4byte_offset3(capsys):
+ msg = "\U0010FFFF"
+ msg = "123" + msg * (1024 // len(msg) + 1)
+
+ m.captured_output_default(msg)
+ stdout, stderr = capsys.readouterr()
+ assert stdout == msg
+ assert stderr == ""
+
+
def test_guard_capture(capsys):
msg = "I've been redirected to Python, I hope!"
m.guard_output(msg)