7
0

1 Commits

Author SHA256 Message Date
b987fcb855 Align with bundle:testing at OBS 2026-01-08 15:37:21 +00:00
4 changed files with 433 additions and 2 deletions

View File

@@ -0,0 +1,190 @@
From 9043edabc7e2f0dd655146e0a4571e2a0b2906af Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Fri, 13 Jun 2025 19:57:48 +0300
Subject: [PATCH] gh-135462: Fix quadratic complexity in processing special
input in HTMLParser (GH-135464)
End-of-file errors are now handled according to the HTML5 specs --
comments and declarations are automatically closed, tags are ignored.
(cherry picked from commit 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Lib/html/parser.py | 41 +++++---
Lib/test/test_htmlparser.py | 51 +++++++---
Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst | 4
3 files changed, 74 insertions(+), 22 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
Index: Python-3.11.13/Lib/html/parser.py
===================================================================
--- Python-3.11.13.orig/Lib/html/parser.py 2025-07-02 18:12:07.084569398 +0200
+++ Python-3.11.13/Lib/html/parser.py 2025-07-02 18:12:12.582519793 +0200
@@ -25,6 +25,7 @@
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
+endtagopen = re.compile('</[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
# Note:
@@ -176,7 +177,7 @@
k = self.parse_pi(i)
elif startswith("<!", i):
k = self.parse_html_declaration(i)
- elif (i + 1) < n:
+ elif (i + 1) < n or end:
self.handle_data("<")
k = i + 1
else:
@@ -184,17 +185,35 @@
if k < 0:
if not end:
break
- k = rawdata.find('>', i + 1)
- if k < 0:
- k = rawdata.find('<', i + 1)
- if k < 0:
- k = i + 1
+ if starttagopen.match(rawdata, i): # < + letter
+ pass
+ elif startswith("</", i):
+ if i + 2 == n:
+ self.handle_data("</")
+ elif endtagopen.match(rawdata, i): # </ + letter
+ pass
+ else:
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("<!--", i):
+ j = n
+ for suffix in ("--!", "--", "-"):
+ if rawdata.endswith(suffix, i+4):
+ j -= len(suffix)
+ break
+ self.handle_comment(rawdata[i+4:j])
+ elif startswith("<![CDATA[", i):
+ self.unknown_decl(rawdata[i+3:])
+ elif rawdata[i:i+9].lower() == '<!doctype':
+ self.handle_decl(rawdata[i+2:])
+ elif startswith("<!", i):
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("<?", i):
+ self.handle_pi(rawdata[i+2:])
else:
- k += 1
- if self.convert_charrefs and not self.cdata_elem:
- self.handle_data(unescape(rawdata[i:k]))
- else:
- self.handle_data(rawdata[i:k])
+ raise AssertionError("we should not get here!")
+ k = n
i = self.updatepos(i, k)
elif startswith("&#", i):
match = charref.match(rawdata, i)
Index: Python-3.11.13/Lib/test/test_htmlparser.py
===================================================================
--- Python-3.11.13.orig/Lib/test/test_htmlparser.py 2025-07-02 18:12:08.523658593 +0200
+++ Python-3.11.13/Lib/test/test_htmlparser.py 2025-07-02 18:13:32.674943007 +0200
@@ -4,6 +4,8 @@
import pprint
import unittest
+from test import support
+
class EventCollector(html.parser.HTMLParser):
@@ -391,28 +393,34 @@
('data', '<'),
('starttag', 'bc<', [('a', None)]),
('endtag', 'html'),
- ('data', '\n<img src="URL>'),
- ('comment', '/img'),
- ('endtag', 'html<')])
+ ('data', '\n')])
def test_starttag_junk_chars(self):
+ self._run_check("<", [('data', '<')])
+ self._run_check("<>", [('data', '<>')])
+ self._run_check("< >", [('data', '< >')])
+ self._run_check("< ", [('data', '< ')])
self._run_check("</>", [])
+ self._run_check("<$>", [('data', '<$>')])
self._run_check("</$>", [('comment', '$')])
self._run_check("</", [('data', '</')])
- self._run_check("</a", [('data', '</a')])
+ self._run_check("</a", [])
+ self._run_check("</ a>", [('endtag', 'a')])
+ self._run_check("</ a", [('comment', ' a')])
self._run_check("<a<a>", [('starttag', 'a<a', [])])
self._run_check("</a<a>", [('endtag', 'a<a')])
- self._run_check("<!", [('data', '<!')])
- self._run_check("<a", [('data', '<a')])
- self._run_check("<a foo='bar'", [('data', "<a foo='bar'")])
- self._run_check("<a foo='bar", [('data', "<a foo='bar")])
- self._run_check("<a foo='>'", [('data', "<a foo='>'")])
- self._run_check("<a foo='>", [('data', "<a foo='>")])
+ self._run_check("<!", [('comment', '')])
+ self._run_check("<a", [])
+ self._run_check("<a foo='bar'", [])
+ self._run_check("<a foo='bar", [])
+ self._run_check("<a foo='>'", [])
+ self._run_check("<a foo='>", [])
self._run_check("<a$>", [('starttag', 'a$', [])])
self._run_check("<a$b>", [('starttag', 'a$b', [])])
self._run_check("<a$b/>", [('startendtag', 'a$b', [])])
self._run_check("<a$b >", [('starttag', 'a$b', [])])
self._run_check("<a$b />", [('startendtag', 'a$b', [])])
+ self._run_check("</a$b>", [('endtag', 'a$b')])
def test_slashes_in_starttag(self):
self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])])
@@ -549,8 +557,9 @@
('comment', ' -- close enough --'),
('comment', ''),
('comment', '<-- this was an empty comment'),
- ('comment', '!! another bogus comment !!!'),
+ ('comment', '!! another bogus comment !!!')
]
+
self._run_check(html, expected)
def test_broken_condcoms(self):
@@ -598,6 +607,26 @@
('endtag', 'a'), ('data', ' bar & baz')]
)
+ @support.requires_resource('cpu')
+ def test_eof_no_quadratic_complexity(self):
+ # Each of these examples used to take about an hour.
+ # Now they take a fraction of a second.
+ def check(source):
+ parser = html.parser.HTMLParser()
+ parser.feed(source)
+ parser.close()
+ n = 120_000
+ check("<a " * n)
+ check("<a a=" * n)
+ check("</a " * 14 * n)
+ check("</a a=" * 11 * n)
+ check("<!--" * 4 * n)
+ check("<!" * 60 * n)
+ check("<?" * 19 * n)
+ check("</$" * 15 * n)
+ check("<![CDATA[" * 9 * n)
+ check("<!doctype" * 35 * n)
+
class AttributesTestCase(TestCaseBase):
Index: Python-3.11.13/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ Python-3.11.13/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst 2025-07-02 18:12:12.583386736 +0200
@@ -0,0 +1,4 @@
+Fix quadratic complexity in processing specially crafted input in
+:class:`html.parser.HTMLParser`. End-of-file errors are now handled according
+to the HTML5 specs -- comments and declarations are automatically closed,
+tags are ignored.

View File

@@ -0,0 +1,212 @@
From cb3519590c62f9b1abf7f31b92ec37d4b725ce15 Mon Sep 17 00:00:00 2001
From: Alexander Urieles <aeurielesn@users.noreply.github.com>
Date: Mon, 28 Jul 2025 17:37:26 +0200
Subject: [PATCH] gh-130577: tarfile now validates archives to ensure member
offsets are non-negative (GH-137027) (cherry picked from commit
7040aa54f14676938970e10c5f74ea93cd56aa38)
Co-authored-by: Alexander Urieles <aeurielesn@users.noreply.github.com>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
---
Lib/tarfile.py | 3
Lib/test/test_tarfile.py | 156 ++++++++++
Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst | 3
3 files changed, 162 insertions(+)
create mode 100644 Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst
Index: Python-3.11.13/Lib/tarfile.py
===================================================================
--- Python-3.11.13.orig/Lib/tarfile.py 2025-08-01 22:21:29.158050900 +0200
+++ Python-3.11.13/Lib/tarfile.py 2025-08-01 22:21:33.121079687 +0200
@@ -1613,6 +1613,9 @@
"""Round up a byte count by BLOCKSIZE and return it,
e.g. _block(834) => 1024.
"""
+ # Only non-negative offsets are allowed
+ if count < 0:
+ raise InvalidHeaderError("invalid offset")
blocks, remainder = divmod(count, BLOCKSIZE)
if remainder:
blocks += 1
Index: Python-3.11.13/Lib/test/test_tarfile.py
===================================================================
--- Python-3.11.13.orig/Lib/test/test_tarfile.py 2025-08-01 22:21:30.644301786 +0200
+++ Python-3.11.13/Lib/test/test_tarfile.py 2025-08-01 22:21:33.121718600 +0200
@@ -50,6 +50,7 @@
xzname = os.path.join(TEMPDIR, "testtar.tar.xz")
tmpname = os.path.join(TEMPDIR, "tmp.tar")
dotlessname = os.path.join(TEMPDIR, "testtar")
+SPACE = b" "
sha256_regtype = (
"e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce"
@@ -4386,6 +4387,161 @@
ar.extractall(self.testdir, filter='fully_trusted')
+class OffsetValidationTests(unittest.TestCase):
+ tarname = tmpname
+ invalid_posix_header = (
+ # name: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_NAME
+ # mode, space, null terminator: 8 bytes
+ + b"000755" + SPACE + tarfile.NUL
+ # uid, space, null terminator: 8 bytes
+ + b"000001" + SPACE + tarfile.NUL
+ # gid, space, null terminator: 8 bytes
+ + b"000001" + SPACE + tarfile.NUL
+ # size, space: 12 bytes
+ + b"\xff" * 11 + SPACE
+ # mtime, space: 12 bytes
+ + tarfile.NUL * 11 + SPACE
+ # chksum: 8 bytes
+ + b"0011407" + tarfile.NUL
+ # type: 1 byte
+ + tarfile.REGTYPE
+ # linkname: 100 bytes
+ + tarfile.NUL * tarfile.LENGTH_LINK
+ # magic: 6 bytes, version: 2 bytes
+ + tarfile.POSIX_MAGIC
+ # uname: 32 bytes
+ + tarfile.NUL * 32
+ # gname: 32 bytes
+ + tarfile.NUL * 32
+ # devmajor, space, null terminator: 8 bytes
+ + tarfile.NUL * 6 + SPACE + tarfile.NUL
+ # devminor, space, null terminator: 8 bytes
+ + tarfile.NUL * 6 + SPACE + tarfile.NUL
+ # prefix: 155 bytes
+ + tarfile.NUL * tarfile.LENGTH_PREFIX
+ # padding: 12 bytes
+ + tarfile.NUL * 12
+ )
+ invalid_gnu_header = (
+ # name: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_NAME
+ # mode, null terminator: 8 bytes
+ + b"0000755" + tarfile.NUL
+ # uid, null terminator: 8 bytes
+ + b"0000001" + tarfile.NUL
+ # gid, space, null terminator: 8 bytes
+ + b"0000001" + tarfile.NUL
+ # size, space: 12 bytes
+ + b"\xff" * 11 + SPACE
+ # mtime, space: 12 bytes
+ + tarfile.NUL * 11 + SPACE
+ # chksum: 8 bytes
+ + b"0011327" + tarfile.NUL
+ # type: 1 byte
+ + tarfile.REGTYPE
+ # linkname: 100 bytes
+ + tarfile.NUL * tarfile.LENGTH_LINK
+ # magic: 8 bytes
+ + tarfile.GNU_MAGIC
+ # uname: 32 bytes
+ + tarfile.NUL * 32
+ # gname: 32 bytes
+ + tarfile.NUL * 32
+ # devmajor, null terminator: 8 bytes
+ + tarfile.NUL * 8
+ # devminor, null terminator: 8 bytes
+ + tarfile.NUL * 8
+ # padding: 167 bytes
+ + tarfile.NUL * 167
+ )
+ invalid_v7_header = (
+ # name: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_NAME
+ # mode, space, null terminator: 8 bytes
+ + b"000755" + SPACE + tarfile.NUL
+ # uid, space, null terminator: 8 bytes
+ + b"000001" + SPACE + tarfile.NUL
+ # gid, space, null terminator: 8 bytes
+ + b"000001" + SPACE + tarfile.NUL
+ # size, space: 12 bytes
+ + b"\xff" * 11 + SPACE
+ # mtime, space: 12 bytes
+ + tarfile.NUL * 11 + SPACE
+ # chksum: 8 bytes
+ + b"0010070" + tarfile.NUL
+ # type: 1 byte
+ + tarfile.REGTYPE
+ # linkname: 100 bytes
+ + tarfile.NUL * tarfile.LENGTH_LINK
+ # padding: 255 bytes
+ + tarfile.NUL * 255
+ )
+ valid_gnu_header = tarfile.TarInfo("filename").tobuf(tarfile.GNU_FORMAT)
+ data_block = b"\xff" * tarfile.BLOCKSIZE
+
+ def _write_buffer(self, buffer):
+ with open(self.tarname, "wb") as f:
+ f.write(buffer)
+
+ def _get_members(self, ignore_zeros=None):
+ with open(self.tarname, "rb") as f:
+ with tarfile.open(
+ mode="r", fileobj=f, ignore_zeros=ignore_zeros
+ ) as tar:
+ return tar.getmembers()
+
+ def _assert_raises_read_error_exception(self):
+ with self.assertRaisesRegex(
+ tarfile.ReadError, "file could not be opened successfully"
+ ):
+ self._get_members()
+
+ def test_invalid_offset_header_validations(self):
+ for tar_format, invalid_header in (
+ ("posix", self.invalid_posix_header),
+ ("gnu", self.invalid_gnu_header),
+ ("v7", self.invalid_v7_header),
+ ):
+ with self.subTest(format=tar_format):
+ self._write_buffer(invalid_header)
+ self._assert_raises_read_error_exception()
+
+ def test_early_stop_at_invalid_offset_header(self):
+ buffer = self.valid_gnu_header + self.invalid_gnu_header + self.valid_gnu_header
+ self._write_buffer(buffer)
+ members = self._get_members()
+ self.assertEqual(len(members), 1)
+ self.assertEqual(members[0].name, "filename")
+ self.assertEqual(members[0].offset, 0)
+
+ def test_ignore_invalid_archive(self):
+ # 3 invalid headers with their respective data
+ buffer = (self.invalid_gnu_header + self.data_block) * 3
+ self._write_buffer(buffer)
+ members = self._get_members(ignore_zeros=True)
+ self.assertEqual(len(members), 0)
+
+ def test_ignore_invalid_offset_headers(self):
+ for first_block, second_block, expected_offset in (
+ (
+ (self.valid_gnu_header),
+ (self.invalid_gnu_header + self.data_block),
+ 0,
+ ),
+ (
+ (self.invalid_gnu_header + self.data_block),
+ (self.valid_gnu_header),
+ 1024,
+ ),
+ ):
+ self._write_buffer(first_block + second_block)
+ members = self._get_members(ignore_zeros=True)
+ self.assertEqual(len(members), 1)
+ self.assertEqual(members[0].name, "filename")
+ self.assertEqual(members[0].offset, expected_offset)
+
+
def setUpModule():
os_helper.unlink(TEMPDIR)
os.makedirs(TEMPDIR)
Index: Python-3.11.13/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ Python-3.11.13/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst 2025-08-01 22:21:33.122108946 +0200
@@ -0,0 +1,3 @@
+:mod:`tarfile` now validates archives to ensure member offsets are
+non-negative. (Contributed by Alexander Enrique Urieles Nieto in
+:gh:`130577`.)

View File

@@ -1,3 +1,22 @@
-------------------------------------------------------------------
Thu Oct 2 07:33:30 UTC 2025 - Victor Zhestkov <vzhestkov@suse.com>
- Add CVE-2025-8194-tarfile-no-neg-offsets.patch which now
validates archives to ensure member offsets are non-negative
(gh#python/cpython#130577, CVE-2025-8194, bsc#1247249).
- Add CVE-2025-6069-quad-complex-HTMLParser.patch to avoid worst
case quadratic complexity when processing certain crafted
malformed inputs with HTMLParser (CVE-2025-6069, bsc#1244705).
- Extend comment regarding usage of __brp_python_bytecompile macro
as we need to prevent the situation when wrong python interpreter
used for bytecompile.
- Added:
* CVE-2025-8194-tarfile-no-neg-offsets.patch
* CVE-2025-6069-quad-complex-HTMLParser.patch
-------------------------------------------------------------------
Tue Jul 15 11:02:35 UTC 2025 - Victor Zhestkov <vzhestkov@suse.com>

View File

@@ -16,8 +16,10 @@
#
# Disable python bytecompile for all distros
# It's called explicitly in the spec
# Disable python bytecompile with brp modules for all distros
# as we need enforce using the python from this package to prevent
# the situation when wrong python interpreter is used for it.
# The bytecompile called explicitly in the spec during build time.
%global __brp_python_bytecompile %{nil}
%define build_with_opt 1
@@ -196,6 +198,12 @@ Patch19: bso1227999-reproducible-builds.patch
Patch22: gh120226-fix-sendfile-test-kernel-610.patch
# PATCH-FIX-UPSTREAM Add platform triplets for 64-bit LoongArch gh#python/cpython#30939 glaubitz@suse.com
Patch24: add-loongarch64-support.patch
# PATCH-FIX-UPSTREAM CVE-2025-6069-quad-complex-HTMLParser.patch bsc#1244705 mcepl@suse.com
# avoid quadratic complexity when processing malformed inputs with HTMLParser
Patch25: CVE-2025-6069-quad-complex-HTMLParser.patch
# PATCH-FIX-UPSTREAM CVE-2025-8194-tarfile-no-neg-offsets.patch bsc#1247249 mcepl@suse.com
# tarfile now validates archives to ensure member offsets are non-negative
Patch26: CVE-2025-8194-tarfile-no-neg-offsets.patch
# Salt Bundle related patches
Patch100: call-startup-script-always.patch
# PATCH-FIX-SLE no-skipif-doctests.patch jsc#SLE-13738 mcepl@suse.com
@@ -522,6 +530,8 @@ other applications.
%patch -p1 -P 19
%patch -p1 -P 22
%patch -p1 -P 24
%patch -p1 -P 25
%patch -p1 -P 26
%patch -p1 -P 100
%if 0%{?suse_version} <= 1500