Accepting request 1294512 from devel:languages:python:Factory

DEPENDS ON SR#1294511, THEY HAVE TO GO TOGETHER!!! - Fix gil/nogil package description, bsc#1246229 OBS-URL: https://build.opensuse.org/request/show/1294512 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python313?expand=0&rev=27
2025-07-20 13:28:48 +00:00
parent 92106b1aea 8fc89fce82
commit 00d0af4ebb
4 changed files with 327 additions and 3 deletions
--- a/CVE-2025-6069-quad-complex-HTMLParser.patch
+++ b/CVE-2025-6069-quad-complex-HTMLParser.patch
@@ -0,0 +1,247 @@
+From 9043edabc7e2f0dd655146e0a4571e2a0b2906af Mon Sep 17 00:00:00 2001
+From: Serhiy Storchaka <storchaka@gmail.com>
+Date: Fri, 13 Jun 2025 19:57:48 +0300
+Subject: [PATCH] gh-135462: Fix quadratic complexity in processing special
+ input in HTMLParser (GH-135464)
+
+End-of-file errors are now handled according to the HTML5 specs --
+comments and declarations are automatically closed, tags are ignored.
+(cherry picked from commit 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41)
+
+Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
+---
+ Lib/html/parser.py                                                       |   41 +++-
+ Lib/test/test_htmlparser.py                                              |   97 +++++++---
+ Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst |    4 
+ 3 files changed, 111 insertions(+), 31 deletions(-)
+ create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
+
+Index: Python-3.13.5/Lib/html/parser.py
+===================================================================
+--- Python-3.13.5.orig/Lib/html/parser.py	2025-06-11 17:36:57.000000000 +0200
+++ Python-3.13.5/Lib/html/parser.py	2025-07-02 16:49:52.020175099 +0200
+@@ -27,6 +27,7 @@
+ attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?')
+ 
+ starttagopen = re.compile('<[a-zA-Z]')
+endtagopen = re.compile('</[a-zA-Z]')
+ piclose = re.compile('>')
+ commentclose = re.compile(r'--\s*>')
+ # Note:
+@@ -195,7 +196,7 @@
+                     k = self.parse_pi(i)
+                 elif startswith("<!", i):
+                     k = self.parse_html_declaration(i)
+-                elif (i + 1) < n:
+                elif (i + 1) < n or end:
+                     self.handle_data("<")
+                     k = i + 1
+                 else:
+@@ -203,17 +204,35 @@
+                 if k < 0:
+                     if not end:
+                         break
+-                    k = rawdata.find('>', i + 1)
+-                    if k < 0:
+-                        k = rawdata.find('<', i + 1)
+-                        if k < 0:
+-                            k = i + 1
+                    if starttagopen.match(rawdata, i):  # < + letter
+                        pass
+                    elif startswith("</", i):
+                        if i + 2 == n:
+                            self.handle_data("</")
+                        elif endtagopen.match(rawdata, i):  # </ + letter
+                            pass
+                        else:
+                            # bogus comment
+                            self.handle_comment(rawdata[i+2:])
+                    elif startswith("<!--", i):
+                        j = n
+                        for suffix in ("--!", "--", "-"):
+                            if rawdata.endswith(suffix, i+4):
+                                j -= len(suffix)
+                                break
+                        self.handle_comment(rawdata[i+4:j])
+                    elif startswith("<![CDATA[", i):
+                        self.unknown_decl(rawdata[i+3:])
+                    elif rawdata[i:i+9].lower() == '<!doctype':
+                        self.handle_decl(rawdata[i+2:])
+                    elif startswith("<!", i):
+                        # bogus comment
+                        self.handle_comment(rawdata[i+2:])
+                    elif startswith("<?", i):
+                        self.handle_pi(rawdata[i+2:])
+                     else:
+-                        k += 1
+-                    if self.convert_charrefs and not self.cdata_elem:
+-                        self.handle_data(unescape(rawdata[i:k]))
+-                    else:
+-                        self.handle_data(rawdata[i:k])
+                        raise AssertionError("we should not get here!")
+                    k = n
+                 i = self.updatepos(i, k)
+             elif startswith("&#", i):
+                 match = charref.match(rawdata, i)
+Index: Python-3.13.5/Lib/test/test_htmlparser.py
+===================================================================
+--- Python-3.13.5.orig/Lib/test/test_htmlparser.py	2025-06-11 17:36:57.000000000 +0200
+++ Python-3.13.5/Lib/test/test_htmlparser.py	2025-07-02 16:49:52.020821697 +0200
+@@ -5,6 +5,7 @@
+ import unittest
+ 
+ from unittest.mock import patch
+from test import support
+ 
+ 
+ class EventCollector(html.parser.HTMLParser):
+@@ -430,28 +431,34 @@
+                             ('data', '<'),
+                             ('starttag', 'bc<', [('a', None)]),
+                             ('endtag', 'html'),
+-                            ('data', '\n<img src="URL>'),
+-                            ('comment', '/img'),
+-                            ('endtag', 'html<')])
+                            ('data', '\n')])
+ 
+     def test_starttag_junk_chars(self):
+        self._run_check("<", [('data', '<')])
+        self._run_check("<>", [('data', '<>')])
+        self._run_check("< >", [('data', '< >')])
+        self._run_check("< ", [('data', '< ')])
+         self._run_check("</>", [])
+        self._run_check("<$>", [('data', '<$>')])
+         self._run_check("</$>", [('comment', '$')])
+         self._run_check("</", [('data', '</')])
+-        self._run_check("</a", [('data', '</a')])
+        self._run_check("</a", [])
+        self._run_check("</ a>", [('endtag', 'a')])
+        self._run_check("</ a", [('comment', ' a')])
+         self._run_check("<a<a>", [('starttag', 'a<a', [])])
+         self._run_check("</a<a>", [('endtag', 'a<a')])
+-        self._run_check("<!", [('data', '<!')])
+-        self._run_check("<a", [('data', '<a')])
+-        self._run_check("<a foo='bar'", [('data', "<a foo='bar'")])
+-        self._run_check("<a foo='bar", [('data', "<a foo='bar")])
+-        self._run_check("<a foo='>'", [('data', "<a foo='>'")])
+-        self._run_check("<a foo='>", [('data', "<a foo='>")])
+        self._run_check("<!", [('comment', '')])
+        self._run_check("<a", [])
+        self._run_check("<a foo='bar'", [])
+        self._run_check("<a foo='bar", [])
+        self._run_check("<a foo='>'", [])
+        self._run_check("<a foo='>", [])
+         self._run_check("<a$>", [('starttag', 'a$', [])])
+         self._run_check("<a$b>", [('starttag', 'a$b', [])])
+         self._run_check("<a$b/>", [('startendtag', 'a$b', [])])
+         self._run_check("<a$b  >", [('starttag', 'a$b', [])])
+         self._run_check("<a$b  />", [('startendtag', 'a$b', [])])
+        self._run_check("</a$b>", [('endtag', 'a$b')])
+ 
+     def test_slashes_in_starttag(self):
+         self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])])
+@@ -576,21 +583,50 @@
+         for html, expected in data:
+             self._run_check(html, expected)
+ 
+-    def test_EOF_in_comments_or_decls(self):
+    def test_eof_in_comments(self):
+         data = [
+-            ('<!', [('data', '<!')]),
+-            ('<!-', [('data', '<!-')]),
+-            ('<!--', [('data', '<!--')]),
+-            ('<![', [('data', '<![')]),
+-            ('<![CDATA[', [('data', '<![CDATA[')]),
+-            ('<![CDATA[x', [('data', '<![CDATA[x')]),
+-            ('<!DOCTYPE', [('data', '<!DOCTYPE')]),
+-            ('<!DOCTYPE HTML', [('data', '<!DOCTYPE HTML')]),
+            ('<!--', [('comment', '')]),
+            ('<!---', [('comment', '')]),
+            ('<!----', [('comment', '')]),
+            ('<!-----', [('comment', '-')]),
+            ('<!------', [('comment', '--')]),
+            ('<!----!', [('comment', '')]),
+            ('<!---!', [('comment', '-!')]),
+            ('<!---!>', [('comment', '-!>')]),
+            ('<!--foo', [('comment', 'foo')]),
+            ('<!--foo-', [('comment', 'foo')]),
+            ('<!--foo--', [('comment', 'foo')]),
+            ('<!--foo--!', [('comment', 'foo')]),
+            ('<!--<!--', [('comment', '<!')]),
+            ('<!--<!--!', [('comment', '<!')]),
+         ]
+         for html, expected in data:
+             self._run_check(html, expected)
+
+    def test_eof_in_declarations(self):
+        data = [
+            ('<!', [('comment', '')]),
+            ('<!-', [('comment', '-')]),
+            ('<![', [('comment', '[')]),
+            ('<![CDATA[', [('unknown decl', 'CDATA[')]),
+            ('<![CDATA[x', [('unknown decl', 'CDATA[x')]),
+            ('<![CDATA[x]', [('unknown decl', 'CDATA[x]')]),
+            ('<![CDATA[x]]', [('unknown decl', 'CDATA[x]]')]),
+            ('<!DOCTYPE', [('decl', 'DOCTYPE')]),
+            ('<!DOCTYPE ', [('decl', 'DOCTYPE ')]),
+            ('<!DOCTYPE html', [('decl', 'DOCTYPE html')]),
+            ('<!DOCTYPE html ', [('decl', 'DOCTYPE html ')]),
+            ('<!DOCTYPE html PUBLIC', [('decl', 'DOCTYPE html PUBLIC')]),
+            ('<!DOCTYPE html PUBLIC "foo', [('decl', 'DOCTYPE html PUBLIC "foo')]),
+            ('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo',
+             [('decl', 'DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo')]),
+        ]
+        for html, expected in data:
+            self._run_check(html, expected)
+
+     def test_bogus_comments(self):
+-        html = ('<! not really a comment >'
+        html = ('<!ELEMENT br EMPTY>'
+                '<! not really a comment >'
+                 '<! not a comment either -->'
+                 '<! -- close enough -->'
+                 '<!><!<-- this was an empty comment>'
+@@ -604,6 +640,7 @@
+                 '<![CDATA]]>'  # required '[' after CDATA
+         )
+         expected = [
+            ('comment', 'ELEMENT br EMPTY'),
+             ('comment', ' not really a comment '),
+             ('comment', ' not a comment either --'),
+             ('comment', ' -- close enough --'),
+@@ -684,6 +721,26 @@
+              ('endtag', 'a'), ('data', ' bar & baz')]
+         )
+ 
+    @support.requires_resource('cpu')
+    def test_eof_no_quadratic_complexity(self):
+        # Each of these examples used to take about an hour.
+        # Now they take a fraction of a second.
+        def check(source):
+            parser = html.parser.HTMLParser()
+            parser.feed(source)
+            parser.close()
+        n = 120_000
+        check("<a " * n)
+        check("<a a=" * n)
+        check("</a " * 14 * n)
+        check("</a a=" * 11 * n)
+        check("<!--" * 4 * n)
+        check("<!" * 60 * n)
+        check("<?" * 19 * n)
+        check("</$" * 15 * n)
+        check("<![CDATA[" * 9 * n)
+        check("<!doctype" * 35 * n)
+
+ 
+ class AttributesTestCase(TestCaseBase):
+ 
+Index: Python-3.13.5/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ Python-3.13.5/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst	2025-07-02 16:49:52.021124951 +0200
+@@ -0,0 +1,4 @@
+Fix quadratic complexity in processing specially crafted input in
+:class:`html.parser.HTMLParser`. End-of-file errors are now handled according
+to the HTML5 specs -- comments and declarations are automatically closed,
+tags are ignored.
--- a/bsc1243155-sphinx-non-determinism.patch
+++ b/bsc1243155-sphinx-non-determinism.patch
@@ -0,0 +1,45 @@
+From 906a590df191f66f4f0c4a70e3edb6fd82c156ef Mon Sep 17 00:00:00 2001
+From: Daniel Garcia Moreno <daniel.garcia@suse.com>
+Date: Tue, 1 Jul 2025 12:13:28 +0200
+Subject: [PATCH] Doc: Generate ids for audit_events using docname
+
+This patch generates ids for audit_events using the docname so the id is
+not global but depend on the source file. This make the doc build
+reproducible with multiple cores because it doesn't which file is parsed
+first, the id for audit_events will always be consistent independently
+of what file is parsed first.
+
+https://github.com/python/cpython/issues/130979
+---
+ Doc/tools/extensions/audit_events.py |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+Index: Python-3.13.5/Doc/tools/extensions/audit_events.py
+===================================================================
+--- Python-3.13.5.orig/Doc/tools/extensions/audit_events.py	2025-07-02 15:51:58.388560540 +0200
+++ Python-3.13.5/Doc/tools/extensions/audit_events.py	2025-07-02 15:51:58.411254070 +0200
+@@ -72,8 +72,13 @@
+             logger.warning(msg)
+             return
+ 
+-    def id_for(self, name) -> str:
+-        source_count = len(self.sources.get(name, set()))
+    def _source_count(self, name, docname) -> int:
+        """Count the event name in the same source"""
+        sources = self.sources.get(name, set())
+        return len([s for s, t in sources if s == docname])
+
+    def id_for(self, name, docname) -> str:
+        source_count = self._source_count(name, docname)
+         name_clean = re.sub(r"\W", "_", name)
+         return f"audit_event_{name_clean}_{source_count}"
+ 
+@@ -148,7 +153,7 @@
+         except (IndexError, TypeError):
+             target = None
+         if not target:
+-            target = self.env.audit_events.id_for(name)
+            target = self.env.audit_events.id_for(name, self.env.docname)
+             ids.append(target)
+         self.env.audit_events.add_event(name, args, (self.env.docname, target))
+ 
--- a/python313.changes
+++ b/python313.changes
@@ -1,3 +1,29 @@
+-------------------------------------------------------------------
+Thu Jul 10 09:33:26 UTC 2025 - Daniel Garcia <daniel.garcia@suse.com>
+
+- Fix gil/nogil package description, bsc#1246229
+
+-------------------------------------------------------------------
+Wed Jul  2 14:47:20 UTC 2025 - Matej Cepl <mcepl@cepl.eu>
+
+- Add CVE-2025-6069-quad-complex-HTMLParser.patch to avoid worst
+  case quadratic complexity when processing certain crafted
+  malformed inputs with HTMLParser (CVE-2025-6069, bsc#1244705).
+
+-------------------------------------------------------------------
+Wed Jul  2 13:14:28 UTC 2025 - Matej Cepl <mcepl@cepl.eu>
+
+- Add bsc1243155-sphinx-non-determinism.patch (bsc#1243155) to
+  generate ids for audit_events using docname (reproducible
+  builds).
+
+-------------------------------------------------------------------
+Tue Jul  1 08:23:22 UTC 2025 - Daniel Garcia <daniel.garcia@suse.com>
+
+- Use one core to build doc. This will make sphinx doc build
+  reproducible.
+  bsc#1243155
+
 -------------------------------------------------------------------
 Sat Jun 21 14:32:16 UTC 2025 - Marcus Meissner <meissner@suse.com>

--- a/python313.spec
+++ b/python313.spec
@@ -228,6 +228,12 @@ Patch41:        doc-py38-to-py36.patch
 # PATCH-FIX-UPSTREAM gh126985-mv-pyvenv.cfg2getpath.patch mcepl@suse.com
 # Remove tests failing in test_sysconfig
 Patch42:        gh126985-mv-pyvenv.cfg2getpath.patch
+# PATCH-FIX-UPSTREAM bsc1243155-sphinx-non-determinism.patch bsc#1243155 mcepl@suse.com
+# Doc: Generate ids for audit_events using docname
+Patch43:        bsc1243155-sphinx-non-determinism.patch
+# PATCH-FIX-UPSTREAM CVE-2025-6069-quad-complex-HTMLParser.patch bsc#1244705 mcepl@suse.com
+# avoid quadratic complexity when processing malformed inputs with HTMLParser
+Patch44:        CVE-2025-6069-quad-complex-HTMLParser.patch
 BuildRequires:  autoconf-archive
 BuildRequires:  automake
 BuildRequires:  fdupes
@@ -321,7 +327,7 @@ Installing "python3" is sufficient for the vast majority of usecases.
 In addition, recommended packages provide UI toolkit support (python3-curses,
 python3-tk), legacy UNIX database bindings (python3-dbm), and the IDLE
 development environment (python3-idle).
-%if %{with GIL}
+%if %{without GIL}

 This package has been built with the Global Interpreter Lock removed.
 This feature is still considered to be experimental. This package is
@@ -442,7 +448,7 @@ This package contains the interpreter core and most commonly used modules
 from the standard library. This is sufficient for many usecases, but it
 excludes components that depend on external libraries, most notably XML,
 database and UI toolkits support.
-%if %{with GIL}
+%if %{without GIL}

 This package has been built with the Global Interpreter Lock removed.
 This feature is still considered to be experimental. This package is
@@ -562,7 +568,7 @@ TODAY_DATE=`date -r %{SOURCE0} "+%%B %%d, %%Y"`

 cd Doc
 sed -i "s/^today = .*/today = '$TODAY_DATE'/" conf.py
-%make_build -j1 html
+%make_build -j1 JOBS=1 html

 # Build also devhelp files
 sphinx-build -a -b devhelp . build/devhelp