diff --git a/.gitattributes b/.gitattributes index 23f9fd3..9b03811 100644 --- a/.gitattributes +++ b/.gitattributes @@ -21,5 +21,3 @@ *.xz filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text -## Specific LFS patterns -CVE-2025-6176-testfile-bomb-br-64GiB.bin filter=lfs diff=lfs merge=lfs -text diff --git a/CVE-2025-6176-testfile-bomb-br-64GiB.bin b/CVE-2025-6176-testfile-bomb-br-64GiB.bin deleted file mode 100644 index be6bfda..0000000 --- a/CVE-2025-6176-testfile-bomb-br-64GiB.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5d5b6139298c899595f784cdd36ff195dbdd479504c4a48d2a8e0a43d2e7a03d -size 51713 diff --git a/CVE-2025-6176.patch b/CVE-2025-6176.patch index 3330258..657e8df 100644 --- a/CVE-2025-6176.patch +++ b/CVE-2025-6176.patch @@ -1,16 +1,36 @@ -From e3673d5a42cdd8be95c09982240317af1410fea3 Mon Sep 17 00:00:00 2001 -From: Rui Xi -Date: Thu, 6 Nov 2025 18:53:35 +0800 -Subject: [PATCH 01/18] mitigate brotli decompression bomb - -drop brotlicffi ---- - .../downloadermiddlewares/httpcompression.py | 10 +-- - scrapy/utils/_compression.py | 75 +++++-------------- - scrapy/utils/gz.py | 9 +-- - ...st_downloadermiddleware_httpcompression.py | 16 +--- - 4 files changed, 29 insertions(+), 81 deletions(-) - +Index: scrapy-2.13.3/conftest.py +=================================================================== +--- scrapy-2.13.3.orig/conftest.py ++++ scrapy-2.13.3/conftest.py +@@ -116,6 +116,16 @@ def requires_boto3(request): + pytest.skip("boto3 is not installed") + + ++@pytest.fixture(autouse=True) ++def requires_mitmproxy(request): ++ if not request.node.get_closest_marker("requires_mitmproxy"): ++ return ++ try: ++ import mitmproxy # noqa: F401, PLC0415 ++ except ImportError: ++ pytest.skip("mitmproxy is not installed") ++ ++ + def pytest_configure(config): + if config.getoption("--reactor") != "default": + install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor") +Index: scrapy-2.13.3/pyproject.toml +=================================================================== +--- scrapy-2.13.3.orig/pyproject.toml ++++ scrapy-2.13.3/pyproject.toml +@@ -242,6 +242,7 @@ markers = [ + "requires_uvloop: marks tests as only enabled when uvloop is known to be working", + "requires_botocore: marks tests that need botocore (but not boto3)", + "requires_boto3: marks tests that need botocore and boto3", ++ "requires_mitmproxy: marks tests that need mitmproxy", + ] + filterwarnings = [ + "ignore::DeprecationWarning:twisted.web.static" Index: scrapy-2.13.3/scrapy/downloadermiddlewares/httpcompression.py =================================================================== --- scrapy-2.13.3.orig/scrapy/downloadermiddlewares/httpcompression.py @@ -30,7 +50,7 @@ Index: scrapy-2.13.3/scrapy/downloadermiddlewares/httpcompression.py - ACCEPTED_ENCODINGS.append(b"br") + try: + brotli.Decompressor.can_accept_more_data -+ except AttributeError: ++ except AttributeError: # pragma: no cover + warnings.warn( + "You have brotli installed. But 'br' encoding support now requires " + "brotli version >= 1.2.0. Please upgrade brotli version to make Scrapy " @@ -129,7 +149,7 @@ Index: scrapy-2.13.3/scrapy/utils/_compression.py + + def __str__(self) -> str: + return ( -+ "The number of bytes decompressed so far " ++ f"The number of bytes decompressed so far " + f"({self.decompressed_size} B) exceeded the specified maximum " + f"({self.max_size} B)." + ) @@ -147,7 +167,7 @@ Index: scrapy-2.13.3/scrapy/utils/_compression.py + try: + first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE) + except zlib.error: -+ # to work with raw deflate content that may sent by microsoft servers. ++ # to work with raw deflate content that may be sent by microsoft servers. + decompressor = zlib.decompressobj(wbits=-15) + first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE) + decompressed_size = len(first_chunk) @@ -209,7 +229,7 @@ Index: scrapy-2.13.3/scrapy/utils/_compression.py + while not decompressor.is_finished(): + output_chunk = decompressor.process(b"", output_buffer_limit=_CHUNK_SIZE) + if not output_chunk: -+ raise ValueError("Truncated brotli compressed data") ++ break decompressed_size += len(output_chunk) - if max_size and decompressed_size > max_size: - raise _DecompressionMaxSizeExceeded( @@ -275,19 +295,7 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py =================================================================== --- scrapy-2.13.3.orig/tests/test_downloadermiddleware_httpcompression.py +++ scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py -@@ -2,7 +2,6 @@ from gzip import GzipFile - from io import BytesIO - from logging import WARNING - from pathlib import Path --from unittest import SkipTest - - import pytest - from testfixtures import LogCapture -@@ -48,9 +47,26 @@ FORMAT = { - "zstd", # 1 096 → 11 511 612 - ) - }, -+ "bomb-br-64GiB": ("bomb-br-64GiB.bin", "br"), # 51K → 64 GiB decompression bomb +@@ -51,6 +51,22 @@ FORMAT = { } @@ -310,7 +318,7 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py class TestHttpCompression: def setup_method(self): self.crawler = get_crawler(Spider) -@@ -124,13 +140,8 @@ class TestHttpCompression: +@@ -124,13 +140,7 @@ class TestHttpCompression: self.assertStatsEqual("httpcompression/response_bytes", 74837) def test_process_response_br(self): @@ -322,11 +330,10 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py - except ImportError: - raise SkipTest("no brotli") + _skip_if_no_br() -+ response = self._getresponse("br") request = response.request assert response.headers["Content-Encoding"] == b"br" -@@ -143,14 +154,9 @@ class TestHttpCompression: +@@ -143,14 +153,8 @@ class TestHttpCompression: def test_process_response_br_unsupported(self): try: @@ -336,14 +343,14 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py - raise SkipTest("Requires not having brotli support") - except ImportError: - import brotlicffi # noqa: F401 -+ import brotli # noqa: F401,PLC0415 - +- - raise SkipTest("Requires not having brotli support") ++ import brotli # noqa: F401,PLC0415 + pytest.skip("Requires not having brotli support") except ImportError: pass response = self._getresponse("br") -@@ -169,7 +175,7 @@ class TestHttpCompression: +@@ -169,7 +173,7 @@ class TestHttpCompression: ( "HttpCompressionMiddleware cannot decode the response for" " http://scrapytest.org/ from unsupported encoding(s) 'br'." @@ -352,32 +359,7 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py ), ), ) -@@ -177,10 +183,8 @@ class TestHttpCompression: - assert newresponse.headers.getlist("Content-Encoding") == [b"br"] - - def test_process_response_zstd(self): -- try: -- import zstandard # noqa: F401 -- except ImportError: -- raise SkipTest("no zstd support (zstandard)") -+ _skip_if_no_zstd() -+ - raw_content = None - for check_key in FORMAT: - if not check_key.startswith("zstd-"): -@@ -199,9 +203,9 @@ class TestHttpCompression: - - def test_process_response_zstd_unsupported(self): - try: -- import zstandard # noqa: F401 -+ import zstandard # noqa: F401,PLC0415 - -- raise SkipTest("Requires not having zstandard support") -+ pytest.skip("Requires not having zstandard support") - except ImportError: - pass - response = self._getresponse("zstd-static-content-size") -@@ -503,24 +507,20 @@ class TestHttpCompression: +@@ -503,24 +507,19 @@ class TestHttpCompression: self.assertStatsEqual("httpcompression/response_bytes", None) def _test_compression_bomb_setting(self, compression_id): @@ -390,10 +372,9 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py - response = self._getresponse(f"bomb-{compression_id}") - with pytest.raises(IgnoreRequest): -- mw.process_response(response.request, response, spider) + response = self._getresponse(f"bomb-{compression_id}") # 11_511_612 B + with pytest.raises(IgnoreRequest) as exc_info: -+ mw.process_response(response.request, response, self.spider) + mw.process_response(response.request, response, spider) + assert exc_info.value.__cause__.decompressed_size < 1_100_000 def test_compression_bomb_setting_br(self): @@ -405,21 +386,10 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py - except ImportError: - raise SkipTest("no brotli") + _skip_if_no_br() -+ self._test_compression_bomb_setting("br") def test_compression_bomb_setting_deflate(self): -@@ -530,15 +530,13 @@ class TestHttpCompression: - self._test_compression_bomb_setting("gzip") - - def test_compression_bomb_setting_zstd(self): -- try: -- import zstandard # noqa: F401 -- except ImportError: -- raise SkipTest("no zstd support (zstandard)") -+ _skip_if_no_zstd() -+ - self._test_compression_bomb_setting("zstd") +@@ -538,7 +537,7 @@ class TestHttpCompression: def _test_compression_bomb_spider_attr(self, compression_id): class DownloadMaxSizeSpider(Spider): @@ -428,17 +398,15 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py crawler = get_crawler(DownloadMaxSizeSpider) spider = crawler._create_spider("scrapytest.org") -@@ -546,30 +544,28 @@ class TestHttpCompression: +@@ -546,17 +545,12 @@ class TestHttpCompression: mw.open_spider(spider) response = self._getresponse(f"bomb-{compression_id}") - with pytest.raises(IgnoreRequest): -- mw.process_response(response.request, response, spider) + with pytest.raises(IgnoreRequest) as exc_info: -+ mw.process_response(response.request, response, self.spider) + mw.process_response(response.request, response, spider) + assert exc_info.value.__cause__.decompressed_size < 1_100_000 -+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") def test_compression_bomb_spider_attr_br(self): - try: - try: @@ -448,38 +416,18 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py - except ImportError: - raise SkipTest("no brotli") + _skip_if_no_br() -+ self._test_compression_bomb_spider_attr("br") -+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") def test_compression_bomb_spider_attr_deflate(self): - self._test_compression_bomb_spider_attr("deflate") - -+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") - def test_compression_bomb_spider_attr_gzip(self): - self._test_compression_bomb_spider_attr("gzip") - -+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") - def test_compression_bomb_spider_attr_zstd(self): -- try: -- import zstandard # noqa: F401 -- except ImportError: -- raise SkipTest("no zstd support (zstandard)") -+ _skip_if_no_zstd() -+ - self._test_compression_bomb_spider_attr("zstd") - - def _test_compression_bomb_request_meta(self, compression_id): -@@ -579,18 +575,14 @@ class TestHttpCompression: +@@ -579,18 +573,13 @@ class TestHttpCompression: mw.open_spider(spider) response = self._getresponse(f"bomb-{compression_id}") - response.meta["download_maxsize"] = 10_000_000 - with pytest.raises(IgnoreRequest): -- mw.process_response(response.request, response, spider) + response.meta["download_maxsize"] = 1_000_000 + with pytest.raises(IgnoreRequest) as exc_info: -+ mw.process_response(response.request, response, self.spider) + mw.process_response(response.request, response, spider) + assert exc_info.value.__cause__.decompressed_size < 1_100_000 def test_compression_bomb_request_meta_br(self): @@ -491,63 +439,10 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py - except ImportError: - raise SkipTest("no brotli") + _skip_if_no_br() -+ self._test_compression_bomb_request_meta("br") def test_compression_bomb_request_meta_deflate(self): -@@ -600,12 +592,38 @@ class TestHttpCompression: - self._test_compression_bomb_request_meta("gzip") - - def test_compression_bomb_request_meta_zstd(self): -- try: -- import zstandard # noqa: F401 -- except ImportError: -- raise SkipTest("no zstd support (zstandard)") -+ _skip_if_no_zstd() -+ - self._test_compression_bomb_request_meta("zstd") - -+ def test_compression_bomb_output_buffer_limit(self): -+ """Test that the 64 GiB brotli decompression bomb is properly handled. -+ -+ This test ensures that the output_buffer_limit parameter in the brotli -+ decompressor prevents the decompression bomb attack. The bomb file is -+ approximately 51 KB compressed but would decompress to 64 GiB, which -+ should trigger IgnoreRequest when DOWNLOAD_MAXSIZE is exceeded. -+ """ -+ _skip_if_no_br() -+ -+ settings = {"DOWNLOAD_MAXSIZE": 10_000_000} # 10 MB limit -+ crawler = get_crawler(Spider, settings_dict=settings) -+ spider = crawler._create_spider("scrapytest.org") -+ mw = HttpCompressionMiddleware.from_crawler(crawler) -+ mw.open_spider(spider) -+ -+ response = self._getresponse("bomb-br-64GiB") -+ -+ # Verify the response is properly configured -+ assert response.headers["Content-Encoding"] == b"br" -+ -+ # The middleware should raise IgnoreRequest due to exceeding DOWNLOAD_MAXSIZE -+ with pytest.raises(IgnoreRequest) as exc_info: -+ mw.process_response(response.request, response, self.spider) -+ -+ # Verify the exception message mentions the download size limits -+ assert "exceeded DOWNLOAD_MAXSIZE (10000000 B)" in str(exc_info.value) -+ - def _test_download_warnsize_setting(self, compression_id): - settings = {"DOWNLOAD_WARNSIZE": 10_000_000} - crawler = get_crawler(Spider, settings_dict=settings) -@@ -619,7 +637,7 @@ class TestHttpCompression: - propagate=False, - level=WARNING, - ) as log: -- mw.process_response(response.request, response, spider) -+ mw.process_response(response.request, response, self.spider) - log.check( - ( - "scrapy.downloadermiddlewares.httpcompression", -@@ -633,13 +651,8 @@ class TestHttpCompression: +@@ -633,13 +622,7 @@ class TestHttpCompression: ) def test_download_warnsize_setting_br(self): @@ -559,37 +454,12 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py - except ImportError: - raise SkipTest("no brotli") + _skip_if_no_br() -+ self._test_download_warnsize_setting("br") def test_download_warnsize_setting_deflate(self): -@@ -649,10 +662,8 @@ class TestHttpCompression: - self._test_download_warnsize_setting("gzip") - - def test_download_warnsize_setting_zstd(self): -- try: -- import zstandard # noqa: F401 -- except ImportError: -- raise SkipTest("no zstd support (zstandard)") -+ _skip_if_no_zstd() -+ - self._test_download_warnsize_setting("zstd") - - def _test_download_warnsize_spider_attr(self, compression_id): -@@ -670,7 +681,7 @@ class TestHttpCompression: - propagate=False, - level=WARNING, - ) as log: -- mw.process_response(response.request, response, spider) -+ mw.process_response(response.request, response, self.spider) - log.check( - ( - "scrapy.downloadermiddlewares.httpcompression", -@@ -683,27 +694,24 @@ class TestHttpCompression: - ), +@@ -684,13 +667,7 @@ class TestHttpCompression: ) -+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") def test_download_warnsize_spider_attr_br(self): - try: - try: @@ -599,38 +469,10 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py - except ImportError: - raise SkipTest("no brotli") + _skip_if_no_br() -+ self._test_download_warnsize_spider_attr("br") -+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") def test_download_warnsize_spider_attr_deflate(self): - self._test_download_warnsize_spider_attr("deflate") - -+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") - def test_download_warnsize_spider_attr_gzip(self): - self._test_download_warnsize_spider_attr("gzip") - -+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") - def test_download_warnsize_spider_attr_zstd(self): -- try: -- import zstandard # noqa: F401 -- except ImportError: -- raise SkipTest("no zstd support (zstandard)") -+ _skip_if_no_zstd() -+ - self._test_download_warnsize_spider_attr("zstd") - - def _test_download_warnsize_request_meta(self, compression_id): -@@ -719,7 +727,7 @@ class TestHttpCompression: - propagate=False, - level=WARNING, - ) as log: -- mw.process_response(response.request, response, spider) -+ mw.process_response(response.request, response, self.spider) - log.check( - ( - "scrapy.downloadermiddlewares.httpcompression", -@@ -733,13 +741,8 @@ class TestHttpCompression: +@@ -733,13 +710,7 @@ class TestHttpCompression: ) def test_download_warnsize_request_meta_br(self): @@ -642,26 +484,85 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py - except ImportError: - raise SkipTest("no brotli") + _skip_if_no_br() -+ self._test_download_warnsize_request_meta("br") def test_download_warnsize_request_meta_deflate(self): -@@ -749,8 +752,6 @@ class TestHttpCompression: - self._test_download_warnsize_request_meta("gzip") - - def test_download_warnsize_request_meta_zstd(self): -- try: -- import zstandard # noqa: F401 -- except ImportError: -- raise SkipTest("no zstd support (zstandard)") -+ _skip_if_no_zstd() -+ +@@ -754,3 +725,34 @@ class TestHttpCompression: + except ImportError: + raise SkipTest("no zstd support (zstandard)") self._test_download_warnsize_request_meta("zstd") ++ ++ def _get_truncated_response(self, compression_id): ++ crawler = get_crawler(Spider) ++ spider = crawler._create_spider("scrapytest.org") ++ mw = HttpCompressionMiddleware.from_crawler(crawler) ++ mw.open_spider(spider) ++ response = self._getresponse(compression_id) ++ truncated_body = response.body[: len(response.body) // 2] ++ response = response.replace(body=truncated_body) ++ return mw.process_response(response.request, response, spider) ++ ++ def test_process_truncated_response_br(self): ++ _skip_if_no_br() ++ resp = self._get_truncated_response("br") ++ assert resp.body.startswith(b"=1.4.87 + commands = +- pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore.junit.xml -o junit_family=legacy -m requires_botocore} ++ pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore.junit.xml -o junit_family=legacy} -m requires_botocore + + [testenv:botocore-pinned] + basepython = {[pinned]basepython} +@@ -269,4 +263,17 @@ install_command = {[pinned]install_comma + setenv = + {[pinned]setenv} + commands = +- pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore-pinned.junit.xml -o junit_family=legacy -m requires_botocore} ++ pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore-pinned.junit.xml -o junit_family=legacy} -m requires_botocore ++ ++ ++# Run proxy tests that use mitmproxy in a separate env to avoid installing ++# numerous mitmproxy deps in other envs (even in extra-deps), as they can ++# conflict with other deps we want, or don't want, to have installed there. ++ ++[testenv:mitmproxy] ++deps = ++ {[testenv]deps} ++ # mitmproxy does not support PyPy ++ mitmproxy; implementation_name != "pypy" ++commands = ++ pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore.junit.xml -o junit_family=legacy} -m requires_mitmproxy diff --git a/python-Scrapy.changes b/python-Scrapy.changes index b4a68aa..1b8f86f 100644 --- a/python-Scrapy.changes +++ b/python-Scrapy.changes @@ -1,3 +1,12 @@ +------------------------------------------------------------------- +Mon Nov 17 10:58:13 UTC 2025 - Daniel Garcia + +- Update CVE-2025-6176.patch to reflect the latest changes upstream to + the patch. +- Remove the CVE-2025-6176-testfile-bomb-br-64GiB.bin source, it's not + needed anymore. + gh#scrapy/scrapy#7134, bsc#1252945, CVE-2025-6176) + ------------------------------------------------------------------- Wed Nov 12 12:28:41 UTC 2025 - Daniel Garcia diff --git a/python-Scrapy.spec b/python-Scrapy.spec index 8ee17ee..75e74b6 100644 --- a/python-Scrapy.spec +++ b/python-Scrapy.spec @@ -38,9 +38,6 @@ Summary: A high-level Python Screen Scraping framework License: BSD-3-Clause URL: https://scrapy.org Source: https://files.pythonhosted.org/packages/source/s/scrapy/scrapy-%{version}.tar.gz -# New test file added in the gh#scrapy/scrapy#7134, needed for Patch2 -# related to CVE-2025-6176 -Source1: CVE-2025-6176-testfile-bomb-br-64GiB.bin # PATCH-FIX-UPSTREAM gh#scrapy/scrapy#6922 Patch0: remove-hoverxref.patch # PATCH-FIX-OPENSUSE No sphinx-rtd-dark-mode @@ -54,7 +51,7 @@ BuildRequires: %{python_module wheel} %if %{with test} # Test requirements: BuildRequires: %{python_module Scrapy = %{version}} -BuildRequires: %{python_module Brotli} +BuildRequires: %{python_module Brotli >= 1.2.0} BuildRequires: %{python_module Pillow} BuildRequires: %{python_module Protego} BuildRequires: %{python_module PyDispatcher >= 2.0.5} @@ -146,8 +143,6 @@ popd %if %{with test} %check -cp %{SOURCE1} tests/sample_data/compressed/bomb-br-64GiB.bin - # no color in obs chroot console skiplist="test_pformat" # no online connection to toscrapy.com