From 37c1ce1f87b11b4bbceb147ad002e0daf5f7b73abc76c89b0b0d5e140e324f79 Mon Sep 17 00:00:00 2001 From: Antonio Larrosa Date: Thu, 8 Jul 2021 10:06:06 +0000 Subject: [PATCH] Accepting request 904606 from home:mcalabkova:branches:devel:languages:python - Add upstream patch unicode.patch to fix random python crash OBS-URL: https://build.opensuse.org/request/show/904606 OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-pybind11?expand=0&rev=27 --- python-pybind11.changes | 5 + python-pybind11.spec | 5 +- unicode.patch | 224 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 unicode.patch diff --git a/python-pybind11.changes b/python-pybind11.changes index 381476d..d85062f 100644 --- a/python-pybind11.changes +++ b/python-pybind11.changes @@ -1,3 +1,8 @@ +------------------------------------------------------------------- +Wed Jul 7 14:30:20 UTC 2021 - Markéta Machová + +- Add upstream patch unicode.patch to fix random python crash + ------------------------------------------------------------------- Mon Feb 15 00:56:32 UTC 2021 - Benjamin Greiner diff --git a/python-pybind11.spec b/python-pybind11.spec index 3552cd8..af74c16 100644 --- a/python-pybind11.spec +++ b/python-pybind11.spec @@ -24,14 +24,16 @@ Summary: Module for operability between C++11 and Python License: BSD-3-Clause URL: https://github.com/pybind/pybind11 Source: https://github.com/pybind/pybind11/archive/v%{version}.tar.gz#/pybind11-%{version}.tar.gz +# PATCH-FIX-UPSTREAM https://github.com/pybind/pybind11/commit/0c93a0f3fcf6bf26be584558d7426564720cea6f Fix Unicode support for ostream redirects +Patch0: unicode.patch BuildRequires: %{python_module devel} -BuildRequires: %{python_module numpy if (%python-base without python36-base)} BuildRequires: %{python_module pytest} BuildRequires: %{python_module setuptools} BuildRequires: cmake BuildRequires: fdupes BuildRequires: gcc-c++ BuildRequires: python-rpm-macros +BuildRequires: %{python_module numpy if (%python-base without python36-base)} BuildArch: noarch %python_subpackages @@ -59,6 +61,7 @@ This package contains files for developing applications using pybind11. %prep %setup -q -n pybind11-%{version} +%autopatch -p1 %build %python_build diff --git a/unicode.patch b/unicode.patch new file mode 100644 index 0000000..7f730b4 --- /dev/null +++ b/unicode.patch @@ -0,0 +1,224 @@ +From 0c93a0f3fcf6bf26be584558d7426564720cea6f Mon Sep 17 00:00:00 2001 +From: Pieter P +Date: Tue, 4 May 2021 07:04:38 +0200 +Subject: [PATCH] Fix Unicode support for ostream redirects (#2982) + +* Crash when printing Unicode to redirected cout +Add failing tests + +* Fix Unicode crashes redirected cout + +* pythonbuf::utf8_remainder check end iterator + +* Remove trailing whitespace and formatting iostream + +* Avoid buffer overflow if ostream redirect races +This doesn't solve the actual race, but at least it now has a much lower +probability of reading past the end of the buffer even when data races +do occur. +--- + include/pybind11/iostream.h | 75 ++++++++++++++++++++++++++----- + tests/test_iostream.py | 90 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 153 insertions(+), 12 deletions(-) + +diff --git a/include/pybind11/iostream.h b/include/pybind11/iostream.h +index 9dee755431..7c1c718b02 100644 +--- a/include/pybind11/iostream.h ++++ b/include/pybind11/iostream.h +@@ -16,6 +16,9 @@ + #include + #include + #include ++#include ++#include ++#include + + PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + PYBIND11_NAMESPACE_BEGIN(detail) +@@ -38,25 +41,73 @@ class pythonbuf : public std::streambuf { + return sync() == 0 ? traits_type::not_eof(c) : traits_type::eof(); + } + ++ // Computes how many bytes at the end of the buffer are part of an ++ // incomplete sequence of UTF-8 bytes. ++ // Precondition: pbase() < pptr() ++ size_t utf8_remainder() const { ++ const auto rbase = std::reverse_iterator(pbase()); ++ const auto rpptr = std::reverse_iterator(pptr()); ++ auto is_ascii = [](char c) { ++ return (static_cast(c) & 0x80) == 0x00; ++ }; ++ auto is_leading = [](char c) { ++ return (static_cast(c) & 0xC0) == 0xC0; ++ }; ++ auto is_leading_2b = [](char c) { ++ return static_cast(c) <= 0xDF; ++ }; ++ auto is_leading_3b = [](char c) { ++ return static_cast(c) <= 0xEF; ++ }; ++ // If the last character is ASCII, there are no incomplete code points ++ if (is_ascii(*rpptr)) ++ return 0; ++ // Otherwise, work back from the end of the buffer and find the first ++ // UTF-8 leading byte ++ const auto rpend = rbase - rpptr >= 3 ? rpptr + 3 : rbase; ++ const auto leading = std::find_if(rpptr, rpend, is_leading); ++ if (leading == rbase) ++ return 0; ++ const auto dist = static_cast(leading - rpptr); ++ size_t remainder = 0; ++ ++ if (dist == 0) ++ remainder = 1; // 1-byte code point is impossible ++ else if (dist == 1) ++ remainder = is_leading_2b(*leading) ? 0 : dist + 1; ++ else if (dist == 2) ++ remainder = is_leading_3b(*leading) ? 0 : dist + 1; ++ // else if (dist >= 3), at least 4 bytes before encountering an UTF-8 ++ // leading byte, either no remainder or invalid UTF-8. ++ // Invalid UTF-8 will cause an exception later when converting ++ // to a Python string, so that's not handled here. ++ return remainder; ++ } ++ + // This function must be non-virtual to be called in a destructor. If the + // rare MSVC test failure shows up with this version, then this should be + // simplified to a fully qualified call. + int _sync() { +- if (pbase() != pptr()) { +- +- { +- gil_scoped_acquire tmp; +- ++ if (pbase() != pptr()) { // If buffer is not empty ++ gil_scoped_acquire tmp; ++ // Placed inside gil_scoped_acquire as a mutex to avoid a race. ++ if (pbase() != pptr()) { // Check again under the lock + // This subtraction cannot be negative, so dropping the sign. +- str line(pbase(), static_cast(pptr() - pbase())); +- +- pywrite(line); +- pyflush(); +- +- // Placed inside gil_scoped_aquire as a mutex to avoid a race ++ auto size = static_cast(pptr() - pbase()); ++ size_t remainder = utf8_remainder(); ++ ++ if (size > remainder) { ++ str line(pbase(), size - remainder); ++ pywrite(line); ++ pyflush(); ++ } ++ ++ // Copy the remainder at the end of the buffer to the beginning: ++ if (remainder > 0) ++ std::memmove(pbase(), pptr() - remainder, remainder); + setp(pbase(), epptr()); ++ pbump(static_cast(remainder)); + } +- + } + return 0; + } +diff --git a/tests/test_iostream.py b/tests/test_iostream.py +index 6d493beda3..e2b74d01cb 100644 +--- a/tests/test_iostream.py ++++ b/tests/test_iostream.py +@@ -69,6 +69,96 @@ def test_captured_large_string(capsys): + assert stderr == "" + + ++def test_captured_utf8_2byte_offset0(capsys): ++ msg = "\u07FF" ++ msg = "" + msg * (1024 // len(msg) + 1) ++ ++ m.captured_output_default(msg) ++ stdout, stderr = capsys.readouterr() ++ assert stdout == msg ++ assert stderr == "" ++ ++ ++def test_captured_utf8_2byte_offset1(capsys): ++ msg = "\u07FF" ++ msg = "1" + msg * (1024 // len(msg) + 1) ++ ++ m.captured_output_default(msg) ++ stdout, stderr = capsys.readouterr() ++ assert stdout == msg ++ assert stderr == "" ++ ++ ++def test_captured_utf8_3byte_offset0(capsys): ++ msg = "\uFFFF" ++ msg = "" + msg * (1024 // len(msg) + 1) ++ ++ m.captured_output_default(msg) ++ stdout, stderr = capsys.readouterr() ++ assert stdout == msg ++ assert stderr == "" ++ ++ ++def test_captured_utf8_3byte_offset1(capsys): ++ msg = "\uFFFF" ++ msg = "1" + msg * (1024 // len(msg) + 1) ++ ++ m.captured_output_default(msg) ++ stdout, stderr = capsys.readouterr() ++ assert stdout == msg ++ assert stderr == "" ++ ++ ++def test_captured_utf8_3byte_offset2(capsys): ++ msg = "\uFFFF" ++ msg = "12" + msg * (1024 // len(msg) + 1) ++ ++ m.captured_output_default(msg) ++ stdout, stderr = capsys.readouterr() ++ assert stdout == msg ++ assert stderr == "" ++ ++ ++def test_captured_utf8_4byte_offset0(capsys): ++ msg = "\U0010FFFF" ++ msg = "" + msg * (1024 // len(msg) + 1) ++ ++ m.captured_output_default(msg) ++ stdout, stderr = capsys.readouterr() ++ assert stdout == msg ++ assert stderr == "" ++ ++ ++def test_captured_utf8_4byte_offset1(capsys): ++ msg = "\U0010FFFF" ++ msg = "1" + msg * (1024 // len(msg) + 1) ++ ++ m.captured_output_default(msg) ++ stdout, stderr = capsys.readouterr() ++ assert stdout == msg ++ assert stderr == "" ++ ++ ++def test_captured_utf8_4byte_offset2(capsys): ++ msg = "\U0010FFFF" ++ msg = "12" + msg * (1024 // len(msg) + 1) ++ ++ m.captured_output_default(msg) ++ stdout, stderr = capsys.readouterr() ++ assert stdout == msg ++ assert stderr == "" ++ ++ ++def test_captured_utf8_4byte_offset3(capsys): ++ msg = "\U0010FFFF" ++ msg = "123" + msg * (1024 // len(msg) + 1) ++ ++ m.captured_output_default(msg) ++ stdout, stderr = capsys.readouterr() ++ assert stdout == msg ++ assert stderr == "" ++ ++ + def test_guard_capture(capsys): + msg = "I've been redirected to Python, I hope!" + m.guard_output(msg)