Accepting request 1294512 from devel:languages:python:Factory

DEPENDS ON SR#1294511, THEY HAVE TO GO TOGETHER!!!

- Fix gil/nogil package description, bsc#1246229

OBS-URL: https://build.opensuse.org/request/show/1294512
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python313?expand=0&rev=27
This commit is contained in:
2025-07-20 13:28:48 +00:00
committed by Git OBS Bridge
4 changed files with 327 additions and 3 deletions

View File

@@ -0,0 +1,247 @@
From 9043edabc7e2f0dd655146e0a4571e2a0b2906af Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Fri, 13 Jun 2025 19:57:48 +0300
Subject: [PATCH] gh-135462: Fix quadratic complexity in processing special
input in HTMLParser (GH-135464)
End-of-file errors are now handled according to the HTML5 specs --
comments and declarations are automatically closed, tags are ignored.
(cherry picked from commit 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Lib/html/parser.py | 41 +++-
Lib/test/test_htmlparser.py | 97 +++++++---
Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst | 4
3 files changed, 111 insertions(+), 31 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
Index: Python-3.13.5/Lib/html/parser.py
===================================================================
--- Python-3.13.5.orig/Lib/html/parser.py 2025-06-11 17:36:57.000000000 +0200
+++ Python-3.13.5/Lib/html/parser.py 2025-07-02 16:49:52.020175099 +0200
@@ -27,6 +27,7 @@
attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?')
starttagopen = re.compile('<[a-zA-Z]')
+endtagopen = re.compile('</[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
# Note:
@@ -195,7 +196,7 @@
k = self.parse_pi(i)
elif startswith("<!", i):
k = self.parse_html_declaration(i)
- elif (i + 1) < n:
+ elif (i + 1) < n or end:
self.handle_data("<")
k = i + 1
else:
@@ -203,17 +204,35 @@
if k < 0:
if not end:
break
- k = rawdata.find('>', i + 1)
- if k < 0:
- k = rawdata.find('<', i + 1)
- if k < 0:
- k = i + 1
+ if starttagopen.match(rawdata, i): # < + letter
+ pass
+ elif startswith("</", i):
+ if i + 2 == n:
+ self.handle_data("</")
+ elif endtagopen.match(rawdata, i): # </ + letter
+ pass
+ else:
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("<!--", i):
+ j = n
+ for suffix in ("--!", "--", "-"):
+ if rawdata.endswith(suffix, i+4):
+ j -= len(suffix)
+ break
+ self.handle_comment(rawdata[i+4:j])
+ elif startswith("<![CDATA[", i):
+ self.unknown_decl(rawdata[i+3:])
+ elif rawdata[i:i+9].lower() == '<!doctype':
+ self.handle_decl(rawdata[i+2:])
+ elif startswith("<!", i):
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("<?", i):
+ self.handle_pi(rawdata[i+2:])
else:
- k += 1
- if self.convert_charrefs and not self.cdata_elem:
- self.handle_data(unescape(rawdata[i:k]))
- else:
- self.handle_data(rawdata[i:k])
+ raise AssertionError("we should not get here!")
+ k = n
i = self.updatepos(i, k)
elif startswith("&#", i):
match = charref.match(rawdata, i)
Index: Python-3.13.5/Lib/test/test_htmlparser.py
===================================================================
--- Python-3.13.5.orig/Lib/test/test_htmlparser.py 2025-06-11 17:36:57.000000000 +0200
+++ Python-3.13.5/Lib/test/test_htmlparser.py 2025-07-02 16:49:52.020821697 +0200
@@ -5,6 +5,7 @@
import unittest
from unittest.mock import patch
+from test import support
class EventCollector(html.parser.HTMLParser):
@@ -430,28 +431,34 @@
('data', '<'),
('starttag', 'bc<', [('a', None)]),
('endtag', 'html'),
- ('data', '\n<img src="URL>'),
- ('comment', '/img'),
- ('endtag', 'html<')])
+ ('data', '\n')])
def test_starttag_junk_chars(self):
+ self._run_check("<", [('data', '<')])
+ self._run_check("<>", [('data', '<>')])
+ self._run_check("< >", [('data', '< >')])
+ self._run_check("< ", [('data', '< ')])
self._run_check("</>", [])
+ self._run_check("<$>", [('data', '<$>')])
self._run_check("</$>", [('comment', '$')])
self._run_check("</", [('data', '</')])
- self._run_check("</a", [('data', '</a')])
+ self._run_check("</a", [])
+ self._run_check("</ a>", [('endtag', 'a')])
+ self._run_check("</ a", [('comment', ' a')])
self._run_check("<a<a>", [('starttag', 'a<a', [])])
self._run_check("</a<a>", [('endtag', 'a<a')])
- self._run_check("<!", [('data', '<!')])
- self._run_check("<a", [('data', '<a')])
- self._run_check("<a foo='bar'", [('data', "<a foo='bar'")])
- self._run_check("<a foo='bar", [('data', "<a foo='bar")])
- self._run_check("<a foo='>'", [('data', "<a foo='>'")])
- self._run_check("<a foo='>", [('data', "<a foo='>")])
+ self._run_check("<!", [('comment', '')])
+ self._run_check("<a", [])
+ self._run_check("<a foo='bar'", [])
+ self._run_check("<a foo='bar", [])
+ self._run_check("<a foo='>'", [])
+ self._run_check("<a foo='>", [])
self._run_check("<a$>", [('starttag', 'a$', [])])
self._run_check("<a$b>", [('starttag', 'a$b', [])])
self._run_check("<a$b/>", [('startendtag', 'a$b', [])])
self._run_check("<a$b >", [('starttag', 'a$b', [])])
self._run_check("<a$b />", [('startendtag', 'a$b', [])])
+ self._run_check("</a$b>", [('endtag', 'a$b')])
def test_slashes_in_starttag(self):
self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])])
@@ -576,21 +583,50 @@
for html, expected in data:
self._run_check(html, expected)
- def test_EOF_in_comments_or_decls(self):
+ def test_eof_in_comments(self):
data = [
- ('<!', [('data', '<!')]),
- ('<!-', [('data', '<!-')]),
- ('<!--', [('data', '<!--')]),
- ('<![', [('data', '<![')]),
- ('<![CDATA[', [('data', '<![CDATA[')]),
- ('<![CDATA[x', [('data', '<![CDATA[x')]),
- ('<!DOCTYPE', [('data', '<!DOCTYPE')]),
- ('<!DOCTYPE HTML', [('data', '<!DOCTYPE HTML')]),
+ ('<!--', [('comment', '')]),
+ ('<!---', [('comment', '')]),
+ ('<!----', [('comment', '')]),
+ ('<!-----', [('comment', '-')]),
+ ('<!------', [('comment', '--')]),
+ ('<!----!', [('comment', '')]),
+ ('<!---!', [('comment', '-!')]),
+ ('<!---!>', [('comment', '-!>')]),
+ ('<!--foo', [('comment', 'foo')]),
+ ('<!--foo-', [('comment', 'foo')]),
+ ('<!--foo--', [('comment', 'foo')]),
+ ('<!--foo--!', [('comment', 'foo')]),
+ ('<!--<!--', [('comment', '<!')]),
+ ('<!--<!--!', [('comment', '<!')]),
]
for html, expected in data:
self._run_check(html, expected)
+
+ def test_eof_in_declarations(self):
+ data = [
+ ('<!', [('comment', '')]),
+ ('<!-', [('comment', '-')]),
+ ('<![', [('comment', '[')]),
+ ('<![CDATA[', [('unknown decl', 'CDATA[')]),
+ ('<![CDATA[x', [('unknown decl', 'CDATA[x')]),
+ ('<![CDATA[x]', [('unknown decl', 'CDATA[x]')]),
+ ('<![CDATA[x]]', [('unknown decl', 'CDATA[x]]')]),
+ ('<!DOCTYPE', [('decl', 'DOCTYPE')]),
+ ('<!DOCTYPE ', [('decl', 'DOCTYPE ')]),
+ ('<!DOCTYPE html', [('decl', 'DOCTYPE html')]),
+ ('<!DOCTYPE html ', [('decl', 'DOCTYPE html ')]),
+ ('<!DOCTYPE html PUBLIC', [('decl', 'DOCTYPE html PUBLIC')]),
+ ('<!DOCTYPE html PUBLIC "foo', [('decl', 'DOCTYPE html PUBLIC "foo')]),
+ ('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo',
+ [('decl', 'DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo')]),
+ ]
+ for html, expected in data:
+ self._run_check(html, expected)
+
def test_bogus_comments(self):
- html = ('<! not really a comment >'
+ html = ('<!ELEMENT br EMPTY>'
+ '<! not really a comment >'
'<! not a comment either -->'
'<! -- close enough -->'
'<!><!<-- this was an empty comment>'
@@ -604,6 +640,7 @@
'<![CDATA]]>' # required '[' after CDATA
)
expected = [
+ ('comment', 'ELEMENT br EMPTY'),
('comment', ' not really a comment '),
('comment', ' not a comment either --'),
('comment', ' -- close enough --'),
@@ -684,6 +721,26 @@
('endtag', 'a'), ('data', ' bar & baz')]
)
+ @support.requires_resource('cpu')
+ def test_eof_no_quadratic_complexity(self):
+ # Each of these examples used to take about an hour.
+ # Now they take a fraction of a second.
+ def check(source):
+ parser = html.parser.HTMLParser()
+ parser.feed(source)
+ parser.close()
+ n = 120_000
+ check("<a " * n)
+ check("<a a=" * n)
+ check("</a " * 14 * n)
+ check("</a a=" * 11 * n)
+ check("<!--" * 4 * n)
+ check("<!" * 60 * n)
+ check("<?" * 19 * n)
+ check("</$" * 15 * n)
+ check("<![CDATA[" * 9 * n)
+ check("<!doctype" * 35 * n)
+
class AttributesTestCase(TestCaseBase):
Index: Python-3.13.5/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ Python-3.13.5/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst 2025-07-02 16:49:52.021124951 +0200
@@ -0,0 +1,4 @@
+Fix quadratic complexity in processing specially crafted input in
+:class:`html.parser.HTMLParser`. End-of-file errors are now handled according
+to the HTML5 specs -- comments and declarations are automatically closed,
+tags are ignored.

View File

@@ -0,0 +1,45 @@
From 906a590df191f66f4f0c4a70e3edb6fd82c156ef Mon Sep 17 00:00:00 2001
From: Daniel Garcia Moreno <daniel.garcia@suse.com>
Date: Tue, 1 Jul 2025 12:13:28 +0200
Subject: [PATCH] Doc: Generate ids for audit_events using docname
This patch generates ids for audit_events using the docname so the id is
not global but depend on the source file. This make the doc build
reproducible with multiple cores because it doesn't which file is parsed
first, the id for audit_events will always be consistent independently
of what file is parsed first.
https://github.com/python/cpython/issues/130979
---
Doc/tools/extensions/audit_events.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
Index: Python-3.13.5/Doc/tools/extensions/audit_events.py
===================================================================
--- Python-3.13.5.orig/Doc/tools/extensions/audit_events.py 2025-07-02 15:51:58.388560540 +0200
+++ Python-3.13.5/Doc/tools/extensions/audit_events.py 2025-07-02 15:51:58.411254070 +0200
@@ -72,8 +72,13 @@
logger.warning(msg)
return
- def id_for(self, name) -> str:
- source_count = len(self.sources.get(name, set()))
+ def _source_count(self, name, docname) -> int:
+ """Count the event name in the same source"""
+ sources = self.sources.get(name, set())
+ return len([s for s, t in sources if s == docname])
+
+ def id_for(self, name, docname) -> str:
+ source_count = self._source_count(name, docname)
name_clean = re.sub(r"\W", "_", name)
return f"audit_event_{name_clean}_{source_count}"
@@ -148,7 +153,7 @@
except (IndexError, TypeError):
target = None
if not target:
- target = self.env.audit_events.id_for(name)
+ target = self.env.audit_events.id_for(name, self.env.docname)
ids.append(target)
self.env.audit_events.add_event(name, args, (self.env.docname, target))

View File

@@ -1,3 +1,29 @@
-------------------------------------------------------------------
Thu Jul 10 09:33:26 UTC 2025 - Daniel Garcia <daniel.garcia@suse.com>
- Fix gil/nogil package description, bsc#1246229
-------------------------------------------------------------------
Wed Jul 2 14:47:20 UTC 2025 - Matej Cepl <mcepl@cepl.eu>
- Add CVE-2025-6069-quad-complex-HTMLParser.patch to avoid worst
case quadratic complexity when processing certain crafted
malformed inputs with HTMLParser (CVE-2025-6069, bsc#1244705).
-------------------------------------------------------------------
Wed Jul 2 13:14:28 UTC 2025 - Matej Cepl <mcepl@cepl.eu>
- Add bsc1243155-sphinx-non-determinism.patch (bsc#1243155) to
generate ids for audit_events using docname (reproducible
builds).
-------------------------------------------------------------------
Tue Jul 1 08:23:22 UTC 2025 - Daniel Garcia <daniel.garcia@suse.com>
- Use one core to build doc. This will make sphinx doc build
reproducible.
bsc#1243155
-------------------------------------------------------------------
Sat Jun 21 14:32:16 UTC 2025 - Marcus Meissner <meissner@suse.com>

View File

@@ -228,6 +228,12 @@ Patch41: doc-py38-to-py36.patch
# PATCH-FIX-UPSTREAM gh126985-mv-pyvenv.cfg2getpath.patch mcepl@suse.com
# Remove tests failing in test_sysconfig
Patch42: gh126985-mv-pyvenv.cfg2getpath.patch
# PATCH-FIX-UPSTREAM bsc1243155-sphinx-non-determinism.patch bsc#1243155 mcepl@suse.com
# Doc: Generate ids for audit_events using docname
Patch43: bsc1243155-sphinx-non-determinism.patch
# PATCH-FIX-UPSTREAM CVE-2025-6069-quad-complex-HTMLParser.patch bsc#1244705 mcepl@suse.com
# avoid quadratic complexity when processing malformed inputs with HTMLParser
Patch44: CVE-2025-6069-quad-complex-HTMLParser.patch
BuildRequires: autoconf-archive
BuildRequires: automake
BuildRequires: fdupes
@@ -321,7 +327,7 @@ Installing "python3" is sufficient for the vast majority of usecases.
In addition, recommended packages provide UI toolkit support (python3-curses,
python3-tk), legacy UNIX database bindings (python3-dbm), and the IDLE
development environment (python3-idle).
%if %{with GIL}
%if %{without GIL}
This package has been built with the Global Interpreter Lock removed.
This feature is still considered to be experimental. This package is
@@ -442,7 +448,7 @@ This package contains the interpreter core and most commonly used modules
from the standard library. This is sufficient for many usecases, but it
excludes components that depend on external libraries, most notably XML,
database and UI toolkits support.
%if %{with GIL}
%if %{without GIL}
This package has been built with the Global Interpreter Lock removed.
This feature is still considered to be experimental. This package is
@@ -562,7 +568,7 @@ TODAY_DATE=`date -r %{SOURCE0} "+%%B %%d, %%Y"`
cd Doc
sed -i "s/^today = .*/today = '$TODAY_DATE'/" conf.py
%make_build -j1 html
%make_build -j1 JOBS=1 html
# Build also devhelp files
sphinx-build -a -b devhelp . build/devhelp