diff --git a/.gitattributes b/.gitattributes index 9b03811..23f9fd3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -21,3 +21,5 @@ *.xz filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text +## Specific LFS patterns +CVE-2025-6176-testfile-bomb-br-64GiB.bin filter=lfs diff=lfs merge=lfs -text diff --git a/CVE-2025-6176-testfile-bomb-br-64GiB.bin b/CVE-2025-6176-testfile-bomb-br-64GiB.bin new file mode 100644 index 0000000..be6bfda --- /dev/null +++ b/CVE-2025-6176-testfile-bomb-br-64GiB.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5b6139298c899595f784cdd36ff195dbdd479504c4a48d2a8e0a43d2e7a03d +size 51713 diff --git a/CVE-2025-6176.patch b/CVE-2025-6176.patch new file mode 100644 index 0000000..3330258 --- /dev/null +++ b/CVE-2025-6176.patch @@ -0,0 +1,684 @@ +From e3673d5a42cdd8be95c09982240317af1410fea3 Mon Sep 17 00:00:00 2001 +From: Rui Xi +Date: Thu, 6 Nov 2025 18:53:35 +0800 +Subject: [PATCH 01/18] mitigate brotli decompression bomb + +drop brotlicffi +--- + .../downloadermiddlewares/httpcompression.py | 10 +-- + scrapy/utils/_compression.py | 75 +++++-------------- + scrapy/utils/gz.py | 9 +-- + ...st_downloadermiddleware_httpcompression.py | 16 +--- + 4 files changed, 29 insertions(+), 81 deletions(-) + +Index: scrapy-2.13.3/scrapy/downloadermiddlewares/httpcompression.py +=================================================================== +--- scrapy-2.13.3.orig/scrapy/downloadermiddlewares/httpcompression.py ++++ scrapy-2.13.3/scrapy/downloadermiddlewares/httpcompression.py +@@ -29,14 +29,20 @@ logger = getLogger(__name__) + ACCEPTED_ENCODINGS: list[bytes] = [b"gzip", b"deflate"] + + try: +- try: +- import brotli # noqa: F401 +- except ImportError: +- import brotlicffi # noqa: F401 ++ import brotli + except ImportError: + pass + else: +- ACCEPTED_ENCODINGS.append(b"br") ++ try: ++ brotli.Decompressor.can_accept_more_data ++ except AttributeError: ++ warnings.warn( ++ "You have brotli installed. But 'br' encoding support now requires " ++ "brotli version >= 1.2.0. Please upgrade brotli version to make Scrapy " ++ "decode 'br' encoded responses.", ++ ) ++ else: ++ ACCEPTED_ENCODINGS.append(b"br") + + try: + import zstandard # noqa: F401 +@@ -98,13 +104,13 @@ class HttpCompressionMiddleware: + decoded_body, content_encoding = self._handle_encoding( + response.body, content_encoding, max_size + ) +- except _DecompressionMaxSizeExceeded: ++ except _DecompressionMaxSizeExceeded as e: + raise IgnoreRequest( + f"Ignored response {response} because its body " +- f"({len(response.body)} B compressed) exceeded " +- f"DOWNLOAD_MAXSIZE ({max_size} B) during " +- f"decompression." +- ) ++ f"({len(response.body)} B compressed, " ++ f"{e.decompressed_size} B decompressed so far) exceeded " ++ f"DOWNLOAD_MAXSIZE ({max_size} B) during decompression." ++ ) from e + if len(response.body) < warn_size <= len(decoded_body): + logger.warning( + f"{response} body size after decompression " +@@ -187,7 +193,7 @@ class HttpCompressionMiddleware: + f"from unsupported encoding(s) '{encodings_str}'." + ) + if b"br" in encodings: +- msg += " You need to install brotli or brotlicffi to decode 'br'." ++ msg += " You need to install brotli >= 1.2.0 to decode 'br'." + if b"zstd" in encodings: + msg += " You need to install zstandard to decode 'zstd'." + logger.warning(msg) +Index: scrapy-2.13.3/scrapy/utils/_compression.py +=================================================================== +--- scrapy-2.13.3.orig/scrapy/utils/_compression.py ++++ scrapy-2.13.3/scrapy/utils/_compression.py +@@ -1,42 +1,9 @@ + import contextlib + import zlib + from io import BytesIO +-from warnings import warn +- +-from scrapy.exceptions import ScrapyDeprecationWarning +- +-try: +- try: +- import brotli +- except ImportError: +- import brotlicffi as brotli +-except ImportError: +- pass +-else: +- try: +- brotli.Decompressor.process +- except AttributeError: +- warn( +- ( +- "You have brotlipy installed, and Scrapy will use it, but " +- "Scrapy support for brotlipy is deprecated and will stop " +- "working in a future version of Scrapy. brotlipy itself is " +- "deprecated, it has been superseded by brotlicffi. Please, " +- "uninstall brotlipy and install brotli or brotlicffi instead. " +- "brotlipy has the same import name as brotli, so keeping both " +- "installed is strongly discouraged." +- ), +- ScrapyDeprecationWarning, +- ) +- +- def _brotli_decompress(decompressor, data): +- return decompressor.decompress(data) +- +- else: +- +- def _brotli_decompress(decompressor, data): +- return decompressor.process(data) + ++with contextlib.suppress(ImportError): ++ import brotli + + with contextlib.suppress(ImportError): + import zstandard +@@ -46,62 +13,64 @@ _CHUNK_SIZE = 65536 # 64 KiB + + + class _DecompressionMaxSizeExceeded(ValueError): +- pass ++ def __init__(self, decompressed_size: int, max_size: int) -> None: ++ self.decompressed_size = decompressed_size ++ self.max_size = max_size ++ ++ def __str__(self) -> str: ++ return ( ++ "The number of bytes decompressed so far " ++ f"({self.decompressed_size} B) exceeded the specified maximum " ++ f"({self.max_size} B)." ++ ) ++ ++ ++def _check_max_size(decompressed_size: int, max_size: int) -> None: ++ if max_size and decompressed_size > max_size: ++ raise _DecompressionMaxSizeExceeded(decompressed_size, max_size) + + + def _inflate(data: bytes, *, max_size: int = 0) -> bytes: + decompressor = zlib.decompressobj() +- raw_decompressor = zlib.decompressobj(wbits=-15) +- input_stream = BytesIO(data) ++ try: ++ first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE) ++ except zlib.error: ++ # to work with raw deflate content that may sent by microsoft servers. ++ decompressor = zlib.decompressobj(wbits=-15) ++ first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE) ++ decompressed_size = len(first_chunk) ++ _check_max_size(decompressed_size, max_size) + output_stream = BytesIO() +- output_chunk = b"." +- decompressed_size = 0 +- while output_chunk: +- input_chunk = input_stream.read(_CHUNK_SIZE) +- try: +- output_chunk = decompressor.decompress(input_chunk) +- except zlib.error: +- if decompressor != raw_decompressor: +- # ugly hack to work with raw deflate content that may +- # be sent by microsoft servers. For more information, see: +- # http://carsten.codimi.de/gzip.yaws/ +- # http://www.port80software.com/200ok/archive/2005/10/31/868.aspx +- # http://www.gzip.org/zlib/zlib_faq.html#faq38 +- decompressor = raw_decompressor +- output_chunk = decompressor.decompress(input_chunk) +- else: +- raise ++ output_stream.write(first_chunk) ++ while decompressor.unconsumed_tail: ++ output_chunk = decompressor.decompress( ++ decompressor.unconsumed_tail, max_length=_CHUNK_SIZE ++ ) + decompressed_size += len(output_chunk) +- if max_size and decompressed_size > max_size: +- raise _DecompressionMaxSizeExceeded( +- f"The number of bytes decompressed so far " +- f"({decompressed_size} B) exceed the specified maximum " +- f"({max_size} B)." +- ) ++ _check_max_size(decompressed_size, max_size) + output_stream.write(output_chunk) +- output_stream.seek(0) +- return output_stream.read() ++ if tail := decompressor.flush(): ++ decompressed_size += len(tail) ++ _check_max_size(decompressed_size, max_size) ++ output_stream.write(tail) ++ return output_stream.getvalue() + + + def _unbrotli(data: bytes, *, max_size: int = 0) -> bytes: + decompressor = brotli.Decompressor() +- input_stream = BytesIO(data) ++ first_chunk = decompressor.process(data, output_buffer_limit=_CHUNK_SIZE) ++ decompressed_size = len(first_chunk) ++ _check_max_size(decompressed_size, max_size) + output_stream = BytesIO() +- output_chunk = b"." +- decompressed_size = 0 +- while output_chunk: +- input_chunk = input_stream.read(_CHUNK_SIZE) +- output_chunk = _brotli_decompress(decompressor, input_chunk) ++ output_stream.write(first_chunk) ++ while not decompressor.is_finished(): ++ output_chunk = decompressor.process(b"", output_buffer_limit=_CHUNK_SIZE) ++ if not output_chunk: ++ raise ValueError("Truncated brotli compressed data") + decompressed_size += len(output_chunk) +- if max_size and decompressed_size > max_size: +- raise _DecompressionMaxSizeExceeded( +- f"The number of bytes decompressed so far " +- f"({decompressed_size} B) exceed the specified maximum " +- f"({max_size} B)." +- ) ++ _check_max_size(decompressed_size, max_size) + output_stream.write(output_chunk) +- output_stream.seek(0) +- return output_stream.read() ++ return output_stream.getvalue() + + + def _unzstd(data: bytes, *, max_size: int = 0) -> bytes: +@@ -113,12 +82,6 @@ def _unzstd(data: bytes, *, max_size: in + while output_chunk: + output_chunk = stream_reader.read(_CHUNK_SIZE) + decompressed_size += len(output_chunk) +- if max_size and decompressed_size > max_size: +- raise _DecompressionMaxSizeExceeded( +- f"The number of bytes decompressed so far " +- f"({decompressed_size} B) exceed the specified maximum " +- f"({max_size} B)." +- ) ++ _check_max_size(decompressed_size, max_size) + output_stream.write(output_chunk) +- output_stream.seek(0) +- return output_stream.read() ++ return output_stream.getvalue() +Index: scrapy-2.13.3/scrapy/utils/gz.py +=================================================================== +--- scrapy-2.13.3.orig/scrapy/utils/gz.py ++++ scrapy-2.13.3/scrapy/utils/gz.py +@@ -5,7 +5,7 @@ from gzip import GzipFile + from io import BytesIO + from typing import TYPE_CHECKING + +-from ._compression import _CHUNK_SIZE, _DecompressionMaxSizeExceeded ++from ._compression import _CHUNK_SIZE, _check_max_size + + if TYPE_CHECKING: + from scrapy.http import Response +@@ -31,15 +31,9 @@ def gunzip(data: bytes, *, max_size: int + break + raise + decompressed_size += len(chunk) +- if max_size and decompressed_size > max_size: +- raise _DecompressionMaxSizeExceeded( +- f"The number of bytes decompressed so far " +- f"({decompressed_size} B) exceed the specified maximum " +- f"({max_size} B)." +- ) ++ _check_max_size(decompressed_size, max_size) + output_stream.write(chunk) +- output_stream.seek(0) +- return output_stream.read() ++ return output_stream.getvalue() + + + def gzip_magic_number(response: Response) -> bool: +Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py +=================================================================== +--- scrapy-2.13.3.orig/tests/test_downloadermiddleware_httpcompression.py ++++ scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py +@@ -2,7 +2,6 @@ from gzip import GzipFile + from io import BytesIO + from logging import WARNING + from pathlib import Path +-from unittest import SkipTest + + import pytest + from testfixtures import LogCapture +@@ -48,9 +47,26 @@ FORMAT = { + "zstd", # 1 096 → 11 511 612 + ) + }, ++ "bomb-br-64GiB": ("bomb-br-64GiB.bin", "br"), # 51K → 64 GiB decompression bomb + } + + ++def _skip_if_no_br() -> None: ++ try: ++ import brotli # noqa: PLC0415 ++ ++ brotli.Decompressor.can_accept_more_data ++ except (ImportError, AttributeError): ++ pytest.skip("no brotli support") ++ ++ ++def _skip_if_no_zstd() -> None: ++ try: ++ import zstandard # noqa: F401,PLC0415 ++ except ImportError: ++ pytest.skip("no zstd support (zstandard)") ++ ++ + class TestHttpCompression: + def setup_method(self): + self.crawler = get_crawler(Spider) +@@ -124,13 +140,8 @@ class TestHttpCompression: + self.assertStatsEqual("httpcompression/response_bytes", 74837) + + def test_process_response_br(self): +- try: +- try: +- import brotli # noqa: F401 +- except ImportError: +- import brotlicffi # noqa: F401 +- except ImportError: +- raise SkipTest("no brotli") ++ _skip_if_no_br() ++ + response = self._getresponse("br") + request = response.request + assert response.headers["Content-Encoding"] == b"br" +@@ -143,14 +154,9 @@ class TestHttpCompression: + + def test_process_response_br_unsupported(self): + try: +- try: +- import brotli # noqa: F401 +- +- raise SkipTest("Requires not having brotli support") +- except ImportError: +- import brotlicffi # noqa: F401 ++ import brotli # noqa: F401,PLC0415 + +- raise SkipTest("Requires not having brotli support") ++ pytest.skip("Requires not having brotli support") + except ImportError: + pass + response = self._getresponse("br") +@@ -169,7 +175,7 @@ class TestHttpCompression: + ( + "HttpCompressionMiddleware cannot decode the response for" + " http://scrapytest.org/ from unsupported encoding(s) 'br'." +- " You need to install brotli or brotlicffi to decode 'br'." ++ " You need to install brotli >= 1.2.0 to decode 'br'." + ), + ), + ) +@@ -177,10 +183,8 @@ class TestHttpCompression: + assert newresponse.headers.getlist("Content-Encoding") == [b"br"] + + def test_process_response_zstd(self): +- try: +- import zstandard # noqa: F401 +- except ImportError: +- raise SkipTest("no zstd support (zstandard)") ++ _skip_if_no_zstd() ++ + raw_content = None + for check_key in FORMAT: + if not check_key.startswith("zstd-"): +@@ -199,9 +203,9 @@ class TestHttpCompression: + + def test_process_response_zstd_unsupported(self): + try: +- import zstandard # noqa: F401 ++ import zstandard # noqa: F401,PLC0415 + +- raise SkipTest("Requires not having zstandard support") ++ pytest.skip("Requires not having zstandard support") + except ImportError: + pass + response = self._getresponse("zstd-static-content-size") +@@ -503,24 +507,20 @@ class TestHttpCompression: + self.assertStatsEqual("httpcompression/response_bytes", None) + + def _test_compression_bomb_setting(self, compression_id): +- settings = {"DOWNLOAD_MAXSIZE": 10_000_000} ++ settings = {"DOWNLOAD_MAXSIZE": 1_000_000} + crawler = get_crawler(Spider, settings_dict=settings) + spider = crawler._create_spider("scrapytest.org") + mw = HttpCompressionMiddleware.from_crawler(crawler) + mw.open_spider(spider) + +- response = self._getresponse(f"bomb-{compression_id}") +- with pytest.raises(IgnoreRequest): +- mw.process_response(response.request, response, spider) ++ response = self._getresponse(f"bomb-{compression_id}") # 11_511_612 B ++ with pytest.raises(IgnoreRequest) as exc_info: ++ mw.process_response(response.request, response, self.spider) ++ assert exc_info.value.__cause__.decompressed_size < 1_100_000 + + def test_compression_bomb_setting_br(self): +- try: +- try: +- import brotli # noqa: F401 +- except ImportError: +- import brotlicffi # noqa: F401 +- except ImportError: +- raise SkipTest("no brotli") ++ _skip_if_no_br() ++ + self._test_compression_bomb_setting("br") + + def test_compression_bomb_setting_deflate(self): +@@ -530,15 +530,13 @@ class TestHttpCompression: + self._test_compression_bomb_setting("gzip") + + def test_compression_bomb_setting_zstd(self): +- try: +- import zstandard # noqa: F401 +- except ImportError: +- raise SkipTest("no zstd support (zstandard)") ++ _skip_if_no_zstd() ++ + self._test_compression_bomb_setting("zstd") + + def _test_compression_bomb_spider_attr(self, compression_id): + class DownloadMaxSizeSpider(Spider): +- download_maxsize = 10_000_000 ++ download_maxsize = 1_000_000 + + crawler = get_crawler(DownloadMaxSizeSpider) + spider = crawler._create_spider("scrapytest.org") +@@ -546,30 +544,28 @@ class TestHttpCompression: + mw.open_spider(spider) + + response = self._getresponse(f"bomb-{compression_id}") +- with pytest.raises(IgnoreRequest): +- mw.process_response(response.request, response, spider) ++ with pytest.raises(IgnoreRequest) as exc_info: ++ mw.process_response(response.request, response, self.spider) ++ assert exc_info.value.__cause__.decompressed_size < 1_100_000 + ++ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") + def test_compression_bomb_spider_attr_br(self): +- try: +- try: +- import brotli # noqa: F401 +- except ImportError: +- import brotlicffi # noqa: F401 +- except ImportError: +- raise SkipTest("no brotli") ++ _skip_if_no_br() ++ + self._test_compression_bomb_spider_attr("br") + ++ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") + def test_compression_bomb_spider_attr_deflate(self): + self._test_compression_bomb_spider_attr("deflate") + ++ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") + def test_compression_bomb_spider_attr_gzip(self): + self._test_compression_bomb_spider_attr("gzip") + ++ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") + def test_compression_bomb_spider_attr_zstd(self): +- try: +- import zstandard # noqa: F401 +- except ImportError: +- raise SkipTest("no zstd support (zstandard)") ++ _skip_if_no_zstd() ++ + self._test_compression_bomb_spider_attr("zstd") + + def _test_compression_bomb_request_meta(self, compression_id): +@@ -579,18 +575,14 @@ class TestHttpCompression: + mw.open_spider(spider) + + response = self._getresponse(f"bomb-{compression_id}") +- response.meta["download_maxsize"] = 10_000_000 +- with pytest.raises(IgnoreRequest): +- mw.process_response(response.request, response, spider) ++ response.meta["download_maxsize"] = 1_000_000 ++ with pytest.raises(IgnoreRequest) as exc_info: ++ mw.process_response(response.request, response, self.spider) ++ assert exc_info.value.__cause__.decompressed_size < 1_100_000 + + def test_compression_bomb_request_meta_br(self): +- try: +- try: +- import brotli # noqa: F401 +- except ImportError: +- import brotlicffi # noqa: F401 +- except ImportError: +- raise SkipTest("no brotli") ++ _skip_if_no_br() ++ + self._test_compression_bomb_request_meta("br") + + def test_compression_bomb_request_meta_deflate(self): +@@ -600,12 +592,38 @@ class TestHttpCompression: + self._test_compression_bomb_request_meta("gzip") + + def test_compression_bomb_request_meta_zstd(self): +- try: +- import zstandard # noqa: F401 +- except ImportError: +- raise SkipTest("no zstd support (zstandard)") ++ _skip_if_no_zstd() ++ + self._test_compression_bomb_request_meta("zstd") + ++ def test_compression_bomb_output_buffer_limit(self): ++ """Test that the 64 GiB brotli decompression bomb is properly handled. ++ ++ This test ensures that the output_buffer_limit parameter in the brotli ++ decompressor prevents the decompression bomb attack. The bomb file is ++ approximately 51 KB compressed but would decompress to 64 GiB, which ++ should trigger IgnoreRequest when DOWNLOAD_MAXSIZE is exceeded. ++ """ ++ _skip_if_no_br() ++ ++ settings = {"DOWNLOAD_MAXSIZE": 10_000_000} # 10 MB limit ++ crawler = get_crawler(Spider, settings_dict=settings) ++ spider = crawler._create_spider("scrapytest.org") ++ mw = HttpCompressionMiddleware.from_crawler(crawler) ++ mw.open_spider(spider) ++ ++ response = self._getresponse("bomb-br-64GiB") ++ ++ # Verify the response is properly configured ++ assert response.headers["Content-Encoding"] == b"br" ++ ++ # The middleware should raise IgnoreRequest due to exceeding DOWNLOAD_MAXSIZE ++ with pytest.raises(IgnoreRequest) as exc_info: ++ mw.process_response(response.request, response, self.spider) ++ ++ # Verify the exception message mentions the download size limits ++ assert "exceeded DOWNLOAD_MAXSIZE (10000000 B)" in str(exc_info.value) ++ + def _test_download_warnsize_setting(self, compression_id): + settings = {"DOWNLOAD_WARNSIZE": 10_000_000} + crawler = get_crawler(Spider, settings_dict=settings) +@@ -619,7 +637,7 @@ class TestHttpCompression: + propagate=False, + level=WARNING, + ) as log: +- mw.process_response(response.request, response, spider) ++ mw.process_response(response.request, response, self.spider) + log.check( + ( + "scrapy.downloadermiddlewares.httpcompression", +@@ -633,13 +651,8 @@ class TestHttpCompression: + ) + + def test_download_warnsize_setting_br(self): +- try: +- try: +- import brotli # noqa: F401 +- except ImportError: +- import brotlicffi # noqa: F401 +- except ImportError: +- raise SkipTest("no brotli") ++ _skip_if_no_br() ++ + self._test_download_warnsize_setting("br") + + def test_download_warnsize_setting_deflate(self): +@@ -649,10 +662,8 @@ class TestHttpCompression: + self._test_download_warnsize_setting("gzip") + + def test_download_warnsize_setting_zstd(self): +- try: +- import zstandard # noqa: F401 +- except ImportError: +- raise SkipTest("no zstd support (zstandard)") ++ _skip_if_no_zstd() ++ + self._test_download_warnsize_setting("zstd") + + def _test_download_warnsize_spider_attr(self, compression_id): +@@ -670,7 +681,7 @@ class TestHttpCompression: + propagate=False, + level=WARNING, + ) as log: +- mw.process_response(response.request, response, spider) ++ mw.process_response(response.request, response, self.spider) + log.check( + ( + "scrapy.downloadermiddlewares.httpcompression", +@@ -683,27 +694,24 @@ class TestHttpCompression: + ), + ) + ++ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") + def test_download_warnsize_spider_attr_br(self): +- try: +- try: +- import brotli # noqa: F401 +- except ImportError: +- import brotlicffi # noqa: F401 +- except ImportError: +- raise SkipTest("no brotli") ++ _skip_if_no_br() ++ + self._test_download_warnsize_spider_attr("br") + ++ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") + def test_download_warnsize_spider_attr_deflate(self): + self._test_download_warnsize_spider_attr("deflate") + ++ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") + def test_download_warnsize_spider_attr_gzip(self): + self._test_download_warnsize_spider_attr("gzip") + ++ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning") + def test_download_warnsize_spider_attr_zstd(self): +- try: +- import zstandard # noqa: F401 +- except ImportError: +- raise SkipTest("no zstd support (zstandard)") ++ _skip_if_no_zstd() ++ + self._test_download_warnsize_spider_attr("zstd") + + def _test_download_warnsize_request_meta(self, compression_id): +@@ -719,7 +727,7 @@ class TestHttpCompression: + propagate=False, + level=WARNING, + ) as log: +- mw.process_response(response.request, response, spider) ++ mw.process_response(response.request, response, self.spider) + log.check( + ( + "scrapy.downloadermiddlewares.httpcompression", +@@ -733,13 +741,8 @@ class TestHttpCompression: + ) + + def test_download_warnsize_request_meta_br(self): +- try: +- try: +- import brotli # noqa: F401 +- except ImportError: +- import brotlicffi # noqa: F401 +- except ImportError: +- raise SkipTest("no brotli") ++ _skip_if_no_br() ++ + self._test_download_warnsize_request_meta("br") + + def test_download_warnsize_request_meta_deflate(self): +@@ -749,8 +752,6 @@ class TestHttpCompression: + self._test_download_warnsize_request_meta("gzip") + + def test_download_warnsize_request_meta_zstd(self): +- try: +- import zstandard # noqa: F401 +- except ImportError: +- raise SkipTest("no zstd support (zstandard)") ++ _skip_if_no_zstd() ++ + self._test_download_warnsize_request_meta("zstd") +Index: scrapy-2.13.3/tox.ini +=================================================================== +--- scrapy-2.13.3.orig/tox.ini ++++ scrapy-2.13.3/tox.ini +@@ -141,8 +141,7 @@ deps = + Twisted[http2] + boto3 + bpython # optional for shell wrapper tests +- brotli; implementation_name != "pypy" # optional for HTTP compress downloader middleware tests +- brotlicffi; implementation_name == "pypy" # optional for HTTP compress downloader middleware tests ++ brotli >= 1.2.0 # optional for HTTP compress downloader middleware tests + google-cloud-storage + ipython + robotexclusionrulesparser +@@ -156,9 +155,7 @@ deps = + Pillow==8.0.0 + boto3==1.20.0 + bpython==0.7.1 +- brotli==0.5.2; implementation_name != "pypy" +- brotlicffi==0.8.0; implementation_name == "pypy" +- brotlipy ++ brotli==1.2.0 + google-cloud-storage==1.29.0 + ipython==2.0.0 + robotexclusionrulesparser==1.6.2 diff --git a/_multibuild b/_multibuild new file mode 100644 index 0000000..fcc7b97 --- /dev/null +++ b/_multibuild @@ -0,0 +1,3 @@ + + test + diff --git a/python-Scrapy.changes b/python-Scrapy.changes index bcfbd93..b4a68aa 100644 --- a/python-Scrapy.changes +++ b/python-Scrapy.changes @@ -1,3 +1,14 @@ +------------------------------------------------------------------- +Wed Nov 12 12:28:41 UTC 2025 - Daniel Garcia + +- Use libalternatives +- Use multibuild to run tests in a subpackage +- add upstream patch CVE-2025-6176.patch to mitigate brotli and + deflate decompression bombs DoS. + This patch adds a new bin test file that was added as a new source + as CVE-2025-6176-testfile-bomb-br-64GiB.bin + gh#scrapy/scrapy#7134, bsc#1252945, CVE-2025-6176) + ------------------------------------------------------------------- Thu Jul 31 05:18:40 UTC 2025 - Steve Kowalik diff --git a/python-Scrapy.spec b/python-Scrapy.spec index 8a939f1..8ee17ee 100644 --- a/python-Scrapy.spec +++ b/python-Scrapy.spec @@ -1,7 +1,7 @@ # # spec file for package python-Scrapy # -# Copyright (c) 2025 SUSE LLC +# Copyright (c) 2025 SUSE LLC and contributors # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -16,37 +16,60 @@ # +%global flavor @BUILD_FLAVOR@%{nil} +%if "%{flavor}" == "test" +%define psuffix -test +%bcond_without test +%endif +%if "%{flavor}" == "" +%define psuffix %{nil} +%bcond_with test +%endif +%if 0%{?suse_version} > 1500 +%bcond_without libalternatives +%else +%bcond_with libalternatives +%endif %{?sle15_python_module_pythons} -Name: python-Scrapy +Name: python-Scrapy%{?psuffix} Version: 2.13.3 Release: 0 Summary: A high-level Python Screen Scraping framework License: BSD-3-Clause URL: https://scrapy.org Source: https://files.pythonhosted.org/packages/source/s/scrapy/scrapy-%{version}.tar.gz +# New test file added in the gh#scrapy/scrapy#7134, needed for Patch2 +# related to CVE-2025-6176 +Source1: CVE-2025-6176-testfile-bomb-br-64GiB.bin # PATCH-FIX-UPSTREAM gh#scrapy/scrapy#6922 Patch0: remove-hoverxref.patch # PATCH-FIX-OPENSUSE No sphinx-rtd-dark-mode Patch1: no-dark-mode.patch +# PATCH-FIX-UPSTREAM CVE-2025-6176.patch gh#scrapy/scrapy#7134 +Patch2: CVE-2025-6176.patch +BuildRequires: %{python_module base >= 3.9} +BuildRequires: %{python_module hatchling} +BuildRequires: %{python_module pip} +BuildRequires: %{python_module wheel} +%if %{with test} +# Test requirements: +BuildRequires: %{python_module Scrapy = %{version}} BuildRequires: %{python_module Brotli} BuildRequires: %{python_module Pillow} BuildRequires: %{python_module Protego} BuildRequires: %{python_module PyDispatcher >= 2.0.5} BuildRequires: %{python_module Twisted >= 18.9.0} BuildRequires: %{python_module attrs} -BuildRequires: %{python_module base >= 3.9} BuildRequires: %{python_module botocore >= 1.4.87} BuildRequires: %{python_module cryptography >= 36.0.0} BuildRequires: %{python_module cssselect >= 0.9.1} BuildRequires: %{python_module dbm} BuildRequires: %{python_module defusedxml >= 0.7.1} -BuildRequires: %{python_module hatchling} BuildRequires: %{python_module itemadapter >= 0.1.0} BuildRequires: %{python_module itemloaders >= 1.0.1} BuildRequires: %{python_module lxml >= 4.4.1} BuildRequires: %{python_module parsel >= 1.5.0} BuildRequires: %{python_module pexpect >= 4.8.1} -BuildRequires: %{python_module pip} BuildRequires: %{python_module pyOpenSSL >= 21.0.0} BuildRequires: %{python_module pyftpdlib >= 1.5.8} BuildRequires: %{python_module pytest-xdist} @@ -59,6 +82,7 @@ BuildRequires: %{python_module tldextract} BuildRequires: %{python_module uvloop} BuildRequires: %{python_module w3lib >= 1.17.0} BuildRequires: %{python_module zope.interface >= 5.1.0} +%endif BuildRequires: fdupes BuildRequires: python-rpm-macros BuildRequires: python3-Sphinx @@ -81,9 +105,14 @@ Requires: python-service_identity >= 18.1.0 Requires: python-tldextract Requires: python-w3lib >= 1.17.2 Requires: python-zope.interface >= 5.1.0 +BuildArch: noarch +%if %{with libalternatives} +BuildRequires: alts +Requires: alts +%else Requires(post): update-alternatives Requires(postun): update-alternatives -BuildArch: noarch +%endif %python_subpackages %description @@ -102,6 +131,7 @@ Provides documentation for %{name}. sed -i -e 's:= python:= python3:g' docs/Makefile +%if %{without test} %build %pyproject_wheel pushd docs @@ -112,8 +142,12 @@ popd %pyproject_install %python_clone -a %{buildroot}%{_bindir}/scrapy %python_expand %fdupes %{buildroot}%{$python_sitelib} +%endif +%if %{with test} %check +cp %{SOURCE1} tests/sample_data/compressed/bomb-br-64GiB.bin + # no color in obs chroot console skiplist="test_pformat" # no online connection to toscrapy.com @@ -126,6 +160,12 @@ skiplist="$skiplist or test_queue_push_pop_priorities" -k "not (${skiplist})" \ -W ignore::DeprecationWarning \ tests} +%endif + +%if %{without test} +%pre +# If libalternatives is used: Removing old update-alternatives entries. +%python_libalternatives_reset_alternative scrapy %post %python_install_alternative scrapy @@ -142,5 +182,6 @@ skiplist="$skiplist or test_queue_push_pop_priorities" %files -n %{name}-doc %doc docs/build/html +%endif %changelog