Accepting request 1294513 from devel:languages:python:Factory

DEPENDS ON SR#1294511, THEY HAVE TO GO TOGETHER!!!

- Add CVE-2025-6069-quad-complex-HTMLParser.patch to avoid worst
  case quadratic complexity when processing certain crafted
  malformed inputs with HTMLParser (CVE-2025-6069, bsc#1244705).

- Add bsc1243155-sphinx-non-determinism.patch (bsc#1243155) to
  generate ids for audit_events using docname (reproducible
  builds).

- Use one core to build doc. This will make sphinx doc build
  reproducible.
  bsc#1243155
- Add CVE-2025-6069-quad-complex-HTMLParser.patch to avoid worst
  case quadratic complexity when processing certain crafted
  malformed inputs with HTMLParser (CVE-2025-6069, bsc#1244705).

- Add bsc1243155-sphinx-non-determinism.patch (bsc#1243155) to
  generate ids for audit_events using docname (reproducible
  builds).

- Use one core to build doc. This will make sphinx doc build
  reproducible.
  bsc#1243155

OBS-URL: https://build.opensuse.org/request/show/1294513
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python312?expand=0&rev=36
This commit is contained in:
2025-07-20 13:28:50 +00:00
committed by Git OBS Bridge
4 changed files with 310 additions and 1 deletions

View File

@@ -0,0 +1,237 @@
From 1d53c3e7343bddb064182e02c21b13be9b63390f Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Fri, 13 Jun 2025 19:57:48 +0300
Subject: [PATCH] [3.12] gh-135462: Fix quadratic complexity in processing
special input in HTMLParser (GH-135464)
End-of-file errors are now handled according to the HTML5 specs --
comments and declarations are automatically closed, tags are ignored.
(cherry picked from commit 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Lib/html/parser.py | 41 +++-
Lib/test/test_htmlparser.py | 94 ++++++++--
Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst | 4
3 files changed, 116 insertions(+), 23 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
Index: Python-3.12.11/Lib/html/parser.py
===================================================================
--- Python-3.12.11.orig/Lib/html/parser.py 2025-07-02 17:09:00.904899297 +0200
+++ Python-3.12.11/Lib/html/parser.py 2025-07-02 17:09:12.496469955 +0200
@@ -25,6 +25,7 @@
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
+endtagopen = re.compile('</[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
# Note:
@@ -177,7 +178,7 @@
k = self.parse_pi(i)
elif startswith("<!", i):
k = self.parse_html_declaration(i)
- elif (i + 1) < n:
+ elif (i + 1) < n or end:
self.handle_data("<")
k = i + 1
else:
@@ -185,17 +186,35 @@
if k < 0:
if not end:
break
- k = rawdata.find('>', i + 1)
- if k < 0:
- k = rawdata.find('<', i + 1)
- if k < 0:
- k = i + 1
+ if starttagopen.match(rawdata, i): # < + letter
+ pass
+ elif startswith("</", i):
+ if i + 2 == n:
+ self.handle_data("</")
+ elif endtagopen.match(rawdata, i): # </ + letter
+ pass
+ else:
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("<!--", i):
+ j = n
+ for suffix in ("--!", "--", "-"):
+ if rawdata.endswith(suffix, i+4):
+ j -= len(suffix)
+ break
+ self.handle_comment(rawdata[i+4:j])
+ elif startswith("<![CDATA[", i):
+ self.unknown_decl(rawdata[i+3:])
+ elif rawdata[i:i+9].lower() == '<!doctype':
+ self.handle_decl(rawdata[i+2:])
+ elif startswith("<!", i):
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("<?", i):
+ self.handle_pi(rawdata[i+2:])
else:
- k += 1
- if self.convert_charrefs and not self.cdata_elem:
- self.handle_data(unescape(rawdata[i:k]))
- else:
- self.handle_data(rawdata[i:k])
+ raise AssertionError("we should not get here!")
+ k = n
i = self.updatepos(i, k)
elif startswith("&#", i):
match = charref.match(rawdata, i)
Index: Python-3.12.11/Lib/test/test_htmlparser.py
===================================================================
--- Python-3.12.11.orig/Lib/test/test_htmlparser.py 2025-07-02 17:09:02.385825386 +0200
+++ Python-3.12.11/Lib/test/test_htmlparser.py 2025-07-02 17:09:12.496693449 +0200
@@ -5,6 +5,7 @@
import unittest
from unittest.mock import patch
+from test import support
class EventCollector(html.parser.HTMLParser):
@@ -393,28 +394,34 @@
('data', '<'),
('starttag', 'bc<', [('a', None)]),
('endtag', 'html'),
- ('data', '\n<img src="URL>'),
- ('comment', '/img'),
- ('endtag', 'html<')])
+ ('data', '\n')])
def test_starttag_junk_chars(self):
+ self._run_check("<", [('data', '<')])
+ self._run_check("<>", [('data', '<>')])
+ self._run_check("< >", [('data', '< >')])
+ self._run_check("< ", [('data', '< ')])
self._run_check("</>", [])
+ self._run_check("<$>", [('data', '<$>')])
self._run_check("</$>", [('comment', '$')])
self._run_check("</", [('data', '</')])
- self._run_check("</a", [('data', '</a')])
+ self._run_check("</a", [])
+ self._run_check("</ a>", [('endtag', 'a')])
+ self._run_check("</ a", [('comment', ' a')])
self._run_check("<a<a>", [('starttag', 'a<a', [])])
self._run_check("</a<a>", [('endtag', 'a<a')])
- self._run_check("<!", [('data', '<!')])
- self._run_check("<a", [('data', '<a')])
- self._run_check("<a foo='bar'", [('data', "<a foo='bar'")])
- self._run_check("<a foo='bar", [('data', "<a foo='bar")])
- self._run_check("<a foo='>'", [('data', "<a foo='>'")])
- self._run_check("<a foo='>", [('data', "<a foo='>")])
+ self._run_check("<!", [('comment', '')])
+ self._run_check("<a", [])
+ self._run_check("<a foo='bar'", [])
+ self._run_check("<a foo='bar", [])
+ self._run_check("<a foo='>'", [])
+ self._run_check("<a foo='>", [])
self._run_check("<a$>", [('starttag', 'a$', [])])
self._run_check("<a$b>", [('starttag', 'a$b', [])])
self._run_check("<a$b/>", [('startendtag', 'a$b', [])])
self._run_check("<a$b >", [('starttag', 'a$b', [])])
self._run_check("<a$b />", [('startendtag', 'a$b', [])])
+ self._run_check("</a$b>", [('endtag', 'a$b')])
def test_slashes_in_starttag(self):
self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])])
@@ -539,13 +546,56 @@
for html, expected in data:
self._run_check(html, expected)
- def test_broken_comments(self):
- html = ('<! not really a comment >'
+ def test_eof_in_comments(self):
+ data = [
+ ('<!--', [('comment', '')]),
+ ('<!---', [('comment', '')]),
+ ('<!----', [('comment', '')]),
+ ('<!-----', [('comment', '-')]),
+ ('<!------', [('comment', '--')]),
+ ('<!----!', [('comment', '')]),
+ ('<!---!', [('comment', '-!')]),
+ ('<!---!>', [('comment', '-!>')]),
+ ('<!--foo', [('comment', 'foo')]),
+ ('<!--foo-', [('comment', 'foo')]),
+ ('<!--foo--', [('comment', 'foo')]),
+ ('<!--foo--!', [('comment', 'foo')]),
+ ('<!--<!--', [('comment', '<!')]),
+ ('<!--<!--!', [('comment', '<!')]),
+ ]
+ for html, expected in data:
+ self._run_check(html, expected)
+
+ def test_eof_in_declarations(self):
+ data = [
+ ('<!', [('comment', '')]),
+ ('<!-', [('comment', '-')]),
+ ('<![', [('comment', '[')]),
+ ('<![CDATA[', [('unknown decl', 'CDATA[')]),
+ ('<![CDATA[x', [('unknown decl', 'CDATA[x')]),
+ ('<![CDATA[x]', [('unknown decl', 'CDATA[x]')]),
+ ('<![CDATA[x]]', [('unknown decl', 'CDATA[x]]')]),
+ ('<!DOCTYPE', [('decl', 'DOCTYPE')]),
+ ('<!DOCTYPE ', [('decl', 'DOCTYPE ')]),
+ ('<!DOCTYPE html', [('decl', 'DOCTYPE html')]),
+ ('<!DOCTYPE html ', [('decl', 'DOCTYPE html ')]),
+ ('<!DOCTYPE html PUBLIC', [('decl', 'DOCTYPE html PUBLIC')]),
+ ('<!DOCTYPE html PUBLIC "foo', [('decl', 'DOCTYPE html PUBLIC "foo')]),
+ ('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo',
+ [('decl', 'DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo')]),
+ ]
+ for html, expected in data:
+ self._run_check(html, expected)
+
+ def test_bogus_comments(self):
+ html = ('<!ELEMENT br EMPTY>'
+ '<! not really a comment >'
'<! not a comment either -->'
'<! -- close enough -->'
'<!><!<-- this was an empty comment>'
'<!!! another bogus comment !!!>')
expected = [
+ ('comment', 'ELEMENT br EMPTY'),
('comment', ' not really a comment '),
('comment', ' not a comment either --'),
('comment', ' -- close enough --'),
@@ -600,6 +650,26 @@
('endtag', 'a'), ('data', ' bar & baz')]
)
+ @support.requires_resource('cpu')
+ def test_eof_no_quadratic_complexity(self):
+ # Each of these examples used to take about an hour.
+ # Now they take a fraction of a second.
+ def check(source):
+ parser = html.parser.HTMLParser()
+ parser.feed(source)
+ parser.close()
+ n = 120_000
+ check("<a " * n)
+ check("<a a=" * n)
+ check("</a " * 14 * n)
+ check("</a a=" * 11 * n)
+ check("<!--" * 4 * n)
+ check("<!" * 60 * n)
+ check("<?" * 19 * n)
+ check("</$" * 15 * n)
+ check("<![CDATA[" * 9 * n)
+ check("<!doctype" * 35 * n)
+
class AttributesTestCase(TestCaseBase):
Index: Python-3.12.11/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ Python-3.12.11/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst 2025-07-02 17:09:12.496947674 +0200
@@ -0,0 +1,4 @@
+Fix quadratic complexity in processing specially crafted input in
+:class:`html.parser.HTMLParser`. End-of-file errors are now handled according
+to the HTML5 specs -- comments and declarations are automatically closed,
+tags are ignored.

View File

@@ -0,0 +1,45 @@
From 906a590df191f66f4f0c4a70e3edb6fd82c156ef Mon Sep 17 00:00:00 2001
From: Daniel Garcia Moreno <daniel.garcia@suse.com>
Date: Tue, 1 Jul 2025 12:13:28 +0200
Subject: [PATCH] Doc: Generate ids for audit_events using docname
This patch generates ids for audit_events using the docname so the id is
not global but depend on the source file. This make the doc build
reproducible with multiple cores because it doesn't which file is parsed
first, the id for audit_events will always be consistent independently
of what file is parsed first.
https://github.com/python/cpython/issues/130979
---
Doc/tools/extensions/audit_events.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
Index: Python-3.12.11/Doc/tools/extensions/audit_events.py
===================================================================
--- Python-3.12.11.orig/Doc/tools/extensions/audit_events.py 2025-07-02 16:10:55.447792590 +0200
+++ Python-3.12.11/Doc/tools/extensions/audit_events.py 2025-07-02 16:12:00.124596479 +0200
@@ -64,8 +64,13 @@
logger.warning(msg)
return
- def id_for(self, name) -> str:
- source_count = len(self.sources.get(name, ()))
+ def _source_count(self, name, docname) -> int:
+ """Count the event name in the same source"""
+ sources = self.sources.get(name, set())
+ return len([s for s, t in sources if s == docname])
+
+ def id_for(self, name, docname) -> str:
+ source_count = self._source_count(name, docname)
name_clean = re.sub(r"\W", "_", name)
return f"audit_event_{name_clean}_{source_count}"
@@ -140,7 +145,7 @@
except (IndexError, TypeError):
target = None
if not target:
- target = self.env.audit_events.id_for(name)
+ target = self.env.audit_events.id_for(name, self.env.docname)
ids.append(target)
self.env.audit_events.add_event(name, args, (self.env.docname, target))

View File

@@ -1,3 +1,24 @@
-------------------------------------------------------------------
Wed Jul 2 14:47:20 UTC 2025 - Matej Cepl <mcepl@cepl.eu>
- Add CVE-2025-6069-quad-complex-HTMLParser.patch to avoid worst
case quadratic complexity when processing certain crafted
malformed inputs with HTMLParser (CVE-2025-6069, bsc#1244705).
-------------------------------------------------------------------
Wed Jul 2 13:14:28 UTC 2025 - Matej Cepl <mcepl@cepl.eu>
- Add bsc1243155-sphinx-non-determinism.patch (bsc#1243155) to
generate ids for audit_events using docname (reproducible
builds).
-------------------------------------------------------------------
Tue Jul 1 08:19:52 UTC 2025 - Daniel Garcia <daniel.garcia@suse.com>
- Use one core to build doc. This will make sphinx doc build
reproducible.
bsc#1243155
-------------------------------------------------------------------
Mon Jun 9 19:41:07 UTC 2025 - Matej Cepl <mcepl@cepl.eu>

View File

@@ -184,6 +184,12 @@ Patch41: docs-docutils_014-Sphinx_420.patch
# PATCH-FIX-SLE doc-py38-to-py36.patch mcepl@suse.com
# Make documentation extensions working with Python 3.6
Patch44: doc-py38-to-py36.patch
# PATCH-FIX-UPSTREAM bsc1243155-sphinx-non-determinism.patch bsc#1243155 mcepl@suse.com
# Doc: Generate ids for audit_events using docname
Patch45: bsc1243155-sphinx-non-determinism.patch
# PATCH-FIX-UPSTREAM CVE-2025-6069-quad-complex-HTMLParser.patch bsc#1244705 mcepl@suse.com
# avoid quadratic complexity when processing malformed inputs with HTMLParser
Patch46: CVE-2025-6069-quad-complex-HTMLParser.patch
BuildRequires: autoconf-archive
BuildRequires: automake
BuildRequires: fdupes
@@ -486,7 +492,7 @@ TODAY_DATE=`date -r %{SOURCE0} "+%%B %%d, %%Y"`
cd Doc
sed -i "s/^today = .*/today = '$TODAY_DATE'/" conf.py
%make_build -j1 html
%make_build -j1 JOBS=1 html
# Build also devhelp files
sphinx-build -a -b devhelp . build/devhelp