forked from pool/python-Scrapy
Accepting request 1318232 from devel:languages:python
- Update CVE-2025-6176.patch to reflect the latest changes upstream to the patch. - Remove the CVE-2025-6176-testfile-bomb-br-64GiB.bin source, it's not needed anymore. gh#scrapy/scrapy#7134, bsc#1252945, CVE-2025-6176) OBS-URL: https://build.opensuse.org/request/show/1318232 OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python-Scrapy?expand=0&rev=26
This commit is contained in:
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -21,5 +21,3 @@
|
|||||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
## Specific LFS patterns
|
|
||||||
CVE-2025-6176-testfile-bomb-br-64GiB.bin filter=lfs diff=lfs merge=lfs -text
|
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
|
||||||
oid sha256:5d5b6139298c899595f784cdd36ff195dbdd479504c4a48d2a8e0a43d2e7a03d
|
|
||||||
size 51713
|
|
||||||
@@ -1,16 +1,36 @@
|
|||||||
From e3673d5a42cdd8be95c09982240317af1410fea3 Mon Sep 17 00:00:00 2001
|
Index: scrapy-2.13.3/conftest.py
|
||||||
From: Rui Xi <Cycloctane@outlook.com>
|
===================================================================
|
||||||
Date: Thu, 6 Nov 2025 18:53:35 +0800
|
--- scrapy-2.13.3.orig/conftest.py
|
||||||
Subject: [PATCH 01/18] mitigate brotli decompression bomb
|
+++ scrapy-2.13.3/conftest.py
|
||||||
|
@@ -116,6 +116,16 @@ def requires_boto3(request):
|
||||||
drop brotlicffi
|
pytest.skip("boto3 is not installed")
|
||||||
---
|
|
||||||
.../downloadermiddlewares/httpcompression.py | 10 +--
|
|
||||||
scrapy/utils/_compression.py | 75 +++++--------------
|
+@pytest.fixture(autouse=True)
|
||||||
scrapy/utils/gz.py | 9 +--
|
+def requires_mitmproxy(request):
|
||||||
...st_downloadermiddleware_httpcompression.py | 16 +---
|
+ if not request.node.get_closest_marker("requires_mitmproxy"):
|
||||||
4 files changed, 29 insertions(+), 81 deletions(-)
|
+ return
|
||||||
|
+ try:
|
||||||
|
+ import mitmproxy # noqa: F401, PLC0415
|
||||||
|
+ except ImportError:
|
||||||
|
+ pytest.skip("mitmproxy is not installed")
|
||||||
|
+
|
||||||
|
+
|
||||||
|
def pytest_configure(config):
|
||||||
|
if config.getoption("--reactor") != "default":
|
||||||
|
install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
|
||||||
|
Index: scrapy-2.13.3/pyproject.toml
|
||||||
|
===================================================================
|
||||||
|
--- scrapy-2.13.3.orig/pyproject.toml
|
||||||
|
+++ scrapy-2.13.3/pyproject.toml
|
||||||
|
@@ -242,6 +242,7 @@ markers = [
|
||||||
|
"requires_uvloop: marks tests as only enabled when uvloop is known to be working",
|
||||||
|
"requires_botocore: marks tests that need botocore (but not boto3)",
|
||||||
|
"requires_boto3: marks tests that need botocore and boto3",
|
||||||
|
+ "requires_mitmproxy: marks tests that need mitmproxy",
|
||||||
|
]
|
||||||
|
filterwarnings = [
|
||||||
|
"ignore::DeprecationWarning:twisted.web.static"
|
||||||
Index: scrapy-2.13.3/scrapy/downloadermiddlewares/httpcompression.py
|
Index: scrapy-2.13.3/scrapy/downloadermiddlewares/httpcompression.py
|
||||||
===================================================================
|
===================================================================
|
||||||
--- scrapy-2.13.3.orig/scrapy/downloadermiddlewares/httpcompression.py
|
--- scrapy-2.13.3.orig/scrapy/downloadermiddlewares/httpcompression.py
|
||||||
@@ -30,7 +50,7 @@ Index: scrapy-2.13.3/scrapy/downloadermiddlewares/httpcompression.py
|
|||||||
- ACCEPTED_ENCODINGS.append(b"br")
|
- ACCEPTED_ENCODINGS.append(b"br")
|
||||||
+ try:
|
+ try:
|
||||||
+ brotli.Decompressor.can_accept_more_data
|
+ brotli.Decompressor.can_accept_more_data
|
||||||
+ except AttributeError:
|
+ except AttributeError: # pragma: no cover
|
||||||
+ warnings.warn(
|
+ warnings.warn(
|
||||||
+ "You have brotli installed. But 'br' encoding support now requires "
|
+ "You have brotli installed. But 'br' encoding support now requires "
|
||||||
+ "brotli version >= 1.2.0. Please upgrade brotli version to make Scrapy "
|
+ "brotli version >= 1.2.0. Please upgrade brotli version to make Scrapy "
|
||||||
@@ -129,7 +149,7 @@ Index: scrapy-2.13.3/scrapy/utils/_compression.py
|
|||||||
+
|
+
|
||||||
+ def __str__(self) -> str:
|
+ def __str__(self) -> str:
|
||||||
+ return (
|
+ return (
|
||||||
+ "The number of bytes decompressed so far "
|
+ f"The number of bytes decompressed so far "
|
||||||
+ f"({self.decompressed_size} B) exceeded the specified maximum "
|
+ f"({self.decompressed_size} B) exceeded the specified maximum "
|
||||||
+ f"({self.max_size} B)."
|
+ f"({self.max_size} B)."
|
||||||
+ )
|
+ )
|
||||||
@@ -147,7 +167,7 @@ Index: scrapy-2.13.3/scrapy/utils/_compression.py
|
|||||||
+ try:
|
+ try:
|
||||||
+ first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE)
|
+ first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE)
|
||||||
+ except zlib.error:
|
+ except zlib.error:
|
||||||
+ # to work with raw deflate content that may sent by microsoft servers.
|
+ # to work with raw deflate content that may be sent by microsoft servers.
|
||||||
+ decompressor = zlib.decompressobj(wbits=-15)
|
+ decompressor = zlib.decompressobj(wbits=-15)
|
||||||
+ first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE)
|
+ first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE)
|
||||||
+ decompressed_size = len(first_chunk)
|
+ decompressed_size = len(first_chunk)
|
||||||
@@ -209,7 +229,7 @@ Index: scrapy-2.13.3/scrapy/utils/_compression.py
|
|||||||
+ while not decompressor.is_finished():
|
+ while not decompressor.is_finished():
|
||||||
+ output_chunk = decompressor.process(b"", output_buffer_limit=_CHUNK_SIZE)
|
+ output_chunk = decompressor.process(b"", output_buffer_limit=_CHUNK_SIZE)
|
||||||
+ if not output_chunk:
|
+ if not output_chunk:
|
||||||
+ raise ValueError("Truncated brotli compressed data")
|
+ break
|
||||||
decompressed_size += len(output_chunk)
|
decompressed_size += len(output_chunk)
|
||||||
- if max_size and decompressed_size > max_size:
|
- if max_size and decompressed_size > max_size:
|
||||||
- raise _DecompressionMaxSizeExceeded(
|
- raise _DecompressionMaxSizeExceeded(
|
||||||
@@ -275,19 +295,7 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
===================================================================
|
===================================================================
|
||||||
--- scrapy-2.13.3.orig/tests/test_downloadermiddleware_httpcompression.py
|
--- scrapy-2.13.3.orig/tests/test_downloadermiddleware_httpcompression.py
|
||||||
+++ scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
+++ scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
||||||
@@ -2,7 +2,6 @@ from gzip import GzipFile
|
@@ -51,6 +51,22 @@ FORMAT = {
|
||||||
from io import BytesIO
|
|
||||||
from logging import WARNING
|
|
||||||
from pathlib import Path
|
|
||||||
-from unittest import SkipTest
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from testfixtures import LogCapture
|
|
||||||
@@ -48,9 +47,26 @@ FORMAT = {
|
|
||||||
"zstd", # 1 096 → 11 511 612
|
|
||||||
)
|
|
||||||
},
|
|
||||||
+ "bomb-br-64GiB": ("bomb-br-64GiB.bin", "br"), # 51K → 64 GiB decompression bomb
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -310,7 +318,7 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
class TestHttpCompression:
|
class TestHttpCompression:
|
||||||
def setup_method(self):
|
def setup_method(self):
|
||||||
self.crawler = get_crawler(Spider)
|
self.crawler = get_crawler(Spider)
|
||||||
@@ -124,13 +140,8 @@ class TestHttpCompression:
|
@@ -124,13 +140,7 @@ class TestHttpCompression:
|
||||||
self.assertStatsEqual("httpcompression/response_bytes", 74837)
|
self.assertStatsEqual("httpcompression/response_bytes", 74837)
|
||||||
|
|
||||||
def test_process_response_br(self):
|
def test_process_response_br(self):
|
||||||
@@ -322,11 +330,10 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
- except ImportError:
|
- except ImportError:
|
||||||
- raise SkipTest("no brotli")
|
- raise SkipTest("no brotli")
|
||||||
+ _skip_if_no_br()
|
+ _skip_if_no_br()
|
||||||
+
|
|
||||||
response = self._getresponse("br")
|
response = self._getresponse("br")
|
||||||
request = response.request
|
request = response.request
|
||||||
assert response.headers["Content-Encoding"] == b"br"
|
assert response.headers["Content-Encoding"] == b"br"
|
||||||
@@ -143,14 +154,9 @@ class TestHttpCompression:
|
@@ -143,14 +153,8 @@ class TestHttpCompression:
|
||||||
|
|
||||||
def test_process_response_br_unsupported(self):
|
def test_process_response_br_unsupported(self):
|
||||||
try:
|
try:
|
||||||
@@ -336,14 +343,14 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
- raise SkipTest("Requires not having brotli support")
|
- raise SkipTest("Requires not having brotli support")
|
||||||
- except ImportError:
|
- except ImportError:
|
||||||
- import brotlicffi # noqa: F401
|
- import brotlicffi # noqa: F401
|
||||||
+ import brotli # noqa: F401,PLC0415
|
-
|
||||||
|
|
||||||
- raise SkipTest("Requires not having brotli support")
|
- raise SkipTest("Requires not having brotli support")
|
||||||
|
+ import brotli # noqa: F401,PLC0415
|
||||||
+ pytest.skip("Requires not having brotli support")
|
+ pytest.skip("Requires not having brotli support")
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
response = self._getresponse("br")
|
response = self._getresponse("br")
|
||||||
@@ -169,7 +175,7 @@ class TestHttpCompression:
|
@@ -169,7 +173,7 @@ class TestHttpCompression:
|
||||||
(
|
(
|
||||||
"HttpCompressionMiddleware cannot decode the response for"
|
"HttpCompressionMiddleware cannot decode the response for"
|
||||||
" http://scrapytest.org/ from unsupported encoding(s) 'br'."
|
" http://scrapytest.org/ from unsupported encoding(s) 'br'."
|
||||||
@@ -352,32 +359,7 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@@ -177,10 +183,8 @@ class TestHttpCompression:
|
@@ -503,24 +507,19 @@ class TestHttpCompression:
|
||||||
assert newresponse.headers.getlist("Content-Encoding") == [b"br"]
|
|
||||||
|
|
||||||
def test_process_response_zstd(self):
|
|
||||||
- try:
|
|
||||||
- import zstandard # noqa: F401
|
|
||||||
- except ImportError:
|
|
||||||
- raise SkipTest("no zstd support (zstandard)")
|
|
||||||
+ _skip_if_no_zstd()
|
|
||||||
+
|
|
||||||
raw_content = None
|
|
||||||
for check_key in FORMAT:
|
|
||||||
if not check_key.startswith("zstd-"):
|
|
||||||
@@ -199,9 +203,9 @@ class TestHttpCompression:
|
|
||||||
|
|
||||||
def test_process_response_zstd_unsupported(self):
|
|
||||||
try:
|
|
||||||
- import zstandard # noqa: F401
|
|
||||||
+ import zstandard # noqa: F401,PLC0415
|
|
||||||
|
|
||||||
- raise SkipTest("Requires not having zstandard support")
|
|
||||||
+ pytest.skip("Requires not having zstandard support")
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
response = self._getresponse("zstd-static-content-size")
|
|
||||||
@@ -503,24 +507,20 @@ class TestHttpCompression:
|
|
||||||
self.assertStatsEqual("httpcompression/response_bytes", None)
|
self.assertStatsEqual("httpcompression/response_bytes", None)
|
||||||
|
|
||||||
def _test_compression_bomb_setting(self, compression_id):
|
def _test_compression_bomb_setting(self, compression_id):
|
||||||
@@ -390,10 +372,9 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
|
|
||||||
- response = self._getresponse(f"bomb-{compression_id}")
|
- response = self._getresponse(f"bomb-{compression_id}")
|
||||||
- with pytest.raises(IgnoreRequest):
|
- with pytest.raises(IgnoreRequest):
|
||||||
- mw.process_response(response.request, response, spider)
|
|
||||||
+ response = self._getresponse(f"bomb-{compression_id}") # 11_511_612 B
|
+ response = self._getresponse(f"bomb-{compression_id}") # 11_511_612 B
|
||||||
+ with pytest.raises(IgnoreRequest) as exc_info:
|
+ with pytest.raises(IgnoreRequest) as exc_info:
|
||||||
+ mw.process_response(response.request, response, self.spider)
|
mw.process_response(response.request, response, spider)
|
||||||
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
|
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
|
||||||
|
|
||||||
def test_compression_bomb_setting_br(self):
|
def test_compression_bomb_setting_br(self):
|
||||||
@@ -405,21 +386,10 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
- except ImportError:
|
- except ImportError:
|
||||||
- raise SkipTest("no brotli")
|
- raise SkipTest("no brotli")
|
||||||
+ _skip_if_no_br()
|
+ _skip_if_no_br()
|
||||||
+
|
|
||||||
self._test_compression_bomb_setting("br")
|
self._test_compression_bomb_setting("br")
|
||||||
|
|
||||||
def test_compression_bomb_setting_deflate(self):
|
def test_compression_bomb_setting_deflate(self):
|
||||||
@@ -530,15 +530,13 @@ class TestHttpCompression:
|
@@ -538,7 +537,7 @@ class TestHttpCompression:
|
||||||
self._test_compression_bomb_setting("gzip")
|
|
||||||
|
|
||||||
def test_compression_bomb_setting_zstd(self):
|
|
||||||
- try:
|
|
||||||
- import zstandard # noqa: F401
|
|
||||||
- except ImportError:
|
|
||||||
- raise SkipTest("no zstd support (zstandard)")
|
|
||||||
+ _skip_if_no_zstd()
|
|
||||||
+
|
|
||||||
self._test_compression_bomb_setting("zstd")
|
|
||||||
|
|
||||||
def _test_compression_bomb_spider_attr(self, compression_id):
|
def _test_compression_bomb_spider_attr(self, compression_id):
|
||||||
class DownloadMaxSizeSpider(Spider):
|
class DownloadMaxSizeSpider(Spider):
|
||||||
@@ -428,17 +398,15 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
|
|
||||||
crawler = get_crawler(DownloadMaxSizeSpider)
|
crawler = get_crawler(DownloadMaxSizeSpider)
|
||||||
spider = crawler._create_spider("scrapytest.org")
|
spider = crawler._create_spider("scrapytest.org")
|
||||||
@@ -546,30 +544,28 @@ class TestHttpCompression:
|
@@ -546,17 +545,12 @@ class TestHttpCompression:
|
||||||
mw.open_spider(spider)
|
mw.open_spider(spider)
|
||||||
|
|
||||||
response = self._getresponse(f"bomb-{compression_id}")
|
response = self._getresponse(f"bomb-{compression_id}")
|
||||||
- with pytest.raises(IgnoreRequest):
|
- with pytest.raises(IgnoreRequest):
|
||||||
- mw.process_response(response.request, response, spider)
|
|
||||||
+ with pytest.raises(IgnoreRequest) as exc_info:
|
+ with pytest.raises(IgnoreRequest) as exc_info:
|
||||||
+ mw.process_response(response.request, response, self.spider)
|
mw.process_response(response.request, response, spider)
|
||||||
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
|
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
|
||||||
|
|
||||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
|
||||||
def test_compression_bomb_spider_attr_br(self):
|
def test_compression_bomb_spider_attr_br(self):
|
||||||
- try:
|
- try:
|
||||||
- try:
|
- try:
|
||||||
@@ -448,38 +416,18 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
- except ImportError:
|
- except ImportError:
|
||||||
- raise SkipTest("no brotli")
|
- raise SkipTest("no brotli")
|
||||||
+ _skip_if_no_br()
|
+ _skip_if_no_br()
|
||||||
+
|
|
||||||
self._test_compression_bomb_spider_attr("br")
|
self._test_compression_bomb_spider_attr("br")
|
||||||
|
|
||||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
|
||||||
def test_compression_bomb_spider_attr_deflate(self):
|
def test_compression_bomb_spider_attr_deflate(self):
|
||||||
self._test_compression_bomb_spider_attr("deflate")
|
@@ -579,18 +573,13 @@ class TestHttpCompression:
|
||||||
|
|
||||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
|
||||||
def test_compression_bomb_spider_attr_gzip(self):
|
|
||||||
self._test_compression_bomb_spider_attr("gzip")
|
|
||||||
|
|
||||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
|
||||||
def test_compression_bomb_spider_attr_zstd(self):
|
|
||||||
- try:
|
|
||||||
- import zstandard # noqa: F401
|
|
||||||
- except ImportError:
|
|
||||||
- raise SkipTest("no zstd support (zstandard)")
|
|
||||||
+ _skip_if_no_zstd()
|
|
||||||
+
|
|
||||||
self._test_compression_bomb_spider_attr("zstd")
|
|
||||||
|
|
||||||
def _test_compression_bomb_request_meta(self, compression_id):
|
|
||||||
@@ -579,18 +575,14 @@ class TestHttpCompression:
|
|
||||||
mw.open_spider(spider)
|
mw.open_spider(spider)
|
||||||
|
|
||||||
response = self._getresponse(f"bomb-{compression_id}")
|
response = self._getresponse(f"bomb-{compression_id}")
|
||||||
- response.meta["download_maxsize"] = 10_000_000
|
- response.meta["download_maxsize"] = 10_000_000
|
||||||
- with pytest.raises(IgnoreRequest):
|
- with pytest.raises(IgnoreRequest):
|
||||||
- mw.process_response(response.request, response, spider)
|
|
||||||
+ response.meta["download_maxsize"] = 1_000_000
|
+ response.meta["download_maxsize"] = 1_000_000
|
||||||
+ with pytest.raises(IgnoreRequest) as exc_info:
|
+ with pytest.raises(IgnoreRequest) as exc_info:
|
||||||
+ mw.process_response(response.request, response, self.spider)
|
mw.process_response(response.request, response, spider)
|
||||||
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
|
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
|
||||||
|
|
||||||
def test_compression_bomb_request_meta_br(self):
|
def test_compression_bomb_request_meta_br(self):
|
||||||
@@ -491,63 +439,10 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
- except ImportError:
|
- except ImportError:
|
||||||
- raise SkipTest("no brotli")
|
- raise SkipTest("no brotli")
|
||||||
+ _skip_if_no_br()
|
+ _skip_if_no_br()
|
||||||
+
|
|
||||||
self._test_compression_bomb_request_meta("br")
|
self._test_compression_bomb_request_meta("br")
|
||||||
|
|
||||||
def test_compression_bomb_request_meta_deflate(self):
|
def test_compression_bomb_request_meta_deflate(self):
|
||||||
@@ -600,12 +592,38 @@ class TestHttpCompression:
|
@@ -633,13 +622,7 @@ class TestHttpCompression:
|
||||||
self._test_compression_bomb_request_meta("gzip")
|
|
||||||
|
|
||||||
def test_compression_bomb_request_meta_zstd(self):
|
|
||||||
- try:
|
|
||||||
- import zstandard # noqa: F401
|
|
||||||
- except ImportError:
|
|
||||||
- raise SkipTest("no zstd support (zstandard)")
|
|
||||||
+ _skip_if_no_zstd()
|
|
||||||
+
|
|
||||||
self._test_compression_bomb_request_meta("zstd")
|
|
||||||
|
|
||||||
+ def test_compression_bomb_output_buffer_limit(self):
|
|
||||||
+ """Test that the 64 GiB brotli decompression bomb is properly handled.
|
|
||||||
+
|
|
||||||
+ This test ensures that the output_buffer_limit parameter in the brotli
|
|
||||||
+ decompressor prevents the decompression bomb attack. The bomb file is
|
|
||||||
+ approximately 51 KB compressed but would decompress to 64 GiB, which
|
|
||||||
+ should trigger IgnoreRequest when DOWNLOAD_MAXSIZE is exceeded.
|
|
||||||
+ """
|
|
||||||
+ _skip_if_no_br()
|
|
||||||
+
|
|
||||||
+ settings = {"DOWNLOAD_MAXSIZE": 10_000_000} # 10 MB limit
|
|
||||||
+ crawler = get_crawler(Spider, settings_dict=settings)
|
|
||||||
+ spider = crawler._create_spider("scrapytest.org")
|
|
||||||
+ mw = HttpCompressionMiddleware.from_crawler(crawler)
|
|
||||||
+ mw.open_spider(spider)
|
|
||||||
+
|
|
||||||
+ response = self._getresponse("bomb-br-64GiB")
|
|
||||||
+
|
|
||||||
+ # Verify the response is properly configured
|
|
||||||
+ assert response.headers["Content-Encoding"] == b"br"
|
|
||||||
+
|
|
||||||
+ # The middleware should raise IgnoreRequest due to exceeding DOWNLOAD_MAXSIZE
|
|
||||||
+ with pytest.raises(IgnoreRequest) as exc_info:
|
|
||||||
+ mw.process_response(response.request, response, self.spider)
|
|
||||||
+
|
|
||||||
+ # Verify the exception message mentions the download size limits
|
|
||||||
+ assert "exceeded DOWNLOAD_MAXSIZE (10000000 B)" in str(exc_info.value)
|
|
||||||
+
|
|
||||||
def _test_download_warnsize_setting(self, compression_id):
|
|
||||||
settings = {"DOWNLOAD_WARNSIZE": 10_000_000}
|
|
||||||
crawler = get_crawler(Spider, settings_dict=settings)
|
|
||||||
@@ -619,7 +637,7 @@ class TestHttpCompression:
|
|
||||||
propagate=False,
|
|
||||||
level=WARNING,
|
|
||||||
) as log:
|
|
||||||
- mw.process_response(response.request, response, spider)
|
|
||||||
+ mw.process_response(response.request, response, self.spider)
|
|
||||||
log.check(
|
|
||||||
(
|
|
||||||
"scrapy.downloadermiddlewares.httpcompression",
|
|
||||||
@@ -633,13 +651,8 @@ class TestHttpCompression:
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_download_warnsize_setting_br(self):
|
def test_download_warnsize_setting_br(self):
|
||||||
@@ -559,37 +454,12 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
- except ImportError:
|
- except ImportError:
|
||||||
- raise SkipTest("no brotli")
|
- raise SkipTest("no brotli")
|
||||||
+ _skip_if_no_br()
|
+ _skip_if_no_br()
|
||||||
+
|
|
||||||
self._test_download_warnsize_setting("br")
|
self._test_download_warnsize_setting("br")
|
||||||
|
|
||||||
def test_download_warnsize_setting_deflate(self):
|
def test_download_warnsize_setting_deflate(self):
|
||||||
@@ -649,10 +662,8 @@ class TestHttpCompression:
|
@@ -684,13 +667,7 @@ class TestHttpCompression:
|
||||||
self._test_download_warnsize_setting("gzip")
|
|
||||||
|
|
||||||
def test_download_warnsize_setting_zstd(self):
|
|
||||||
- try:
|
|
||||||
- import zstandard # noqa: F401
|
|
||||||
- except ImportError:
|
|
||||||
- raise SkipTest("no zstd support (zstandard)")
|
|
||||||
+ _skip_if_no_zstd()
|
|
||||||
+
|
|
||||||
self._test_download_warnsize_setting("zstd")
|
|
||||||
|
|
||||||
def _test_download_warnsize_spider_attr(self, compression_id):
|
|
||||||
@@ -670,7 +681,7 @@ class TestHttpCompression:
|
|
||||||
propagate=False,
|
|
||||||
level=WARNING,
|
|
||||||
) as log:
|
|
||||||
- mw.process_response(response.request, response, spider)
|
|
||||||
+ mw.process_response(response.request, response, self.spider)
|
|
||||||
log.check(
|
|
||||||
(
|
|
||||||
"scrapy.downloadermiddlewares.httpcompression",
|
|
||||||
@@ -683,27 +694,24 @@ class TestHttpCompression:
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
|
||||||
def test_download_warnsize_spider_attr_br(self):
|
def test_download_warnsize_spider_attr_br(self):
|
||||||
- try:
|
- try:
|
||||||
- try:
|
- try:
|
||||||
@@ -599,38 +469,10 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
- except ImportError:
|
- except ImportError:
|
||||||
- raise SkipTest("no brotli")
|
- raise SkipTest("no brotli")
|
||||||
+ _skip_if_no_br()
|
+ _skip_if_no_br()
|
||||||
+
|
|
||||||
self._test_download_warnsize_spider_attr("br")
|
self._test_download_warnsize_spider_attr("br")
|
||||||
|
|
||||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
|
||||||
def test_download_warnsize_spider_attr_deflate(self):
|
def test_download_warnsize_spider_attr_deflate(self):
|
||||||
self._test_download_warnsize_spider_attr("deflate")
|
@@ -733,13 +710,7 @@ class TestHttpCompression:
|
||||||
|
|
||||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
|
||||||
def test_download_warnsize_spider_attr_gzip(self):
|
|
||||||
self._test_download_warnsize_spider_attr("gzip")
|
|
||||||
|
|
||||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
|
||||||
def test_download_warnsize_spider_attr_zstd(self):
|
|
||||||
- try:
|
|
||||||
- import zstandard # noqa: F401
|
|
||||||
- except ImportError:
|
|
||||||
- raise SkipTest("no zstd support (zstandard)")
|
|
||||||
+ _skip_if_no_zstd()
|
|
||||||
+
|
|
||||||
self._test_download_warnsize_spider_attr("zstd")
|
|
||||||
|
|
||||||
def _test_download_warnsize_request_meta(self, compression_id):
|
|
||||||
@@ -719,7 +727,7 @@ class TestHttpCompression:
|
|
||||||
propagate=False,
|
|
||||||
level=WARNING,
|
|
||||||
) as log:
|
|
||||||
- mw.process_response(response.request, response, spider)
|
|
||||||
+ mw.process_response(response.request, response, self.spider)
|
|
||||||
log.check(
|
|
||||||
(
|
|
||||||
"scrapy.downloadermiddlewares.httpcompression",
|
|
||||||
@@ -733,13 +741,8 @@ class TestHttpCompression:
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_download_warnsize_request_meta_br(self):
|
def test_download_warnsize_request_meta_br(self):
|
||||||
@@ -642,26 +484,85 @@ Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
|||||||
- except ImportError:
|
- except ImportError:
|
||||||
- raise SkipTest("no brotli")
|
- raise SkipTest("no brotli")
|
||||||
+ _skip_if_no_br()
|
+ _skip_if_no_br()
|
||||||
+
|
|
||||||
self._test_download_warnsize_request_meta("br")
|
self._test_download_warnsize_request_meta("br")
|
||||||
|
|
||||||
def test_download_warnsize_request_meta_deflate(self):
|
def test_download_warnsize_request_meta_deflate(self):
|
||||||
@@ -749,8 +752,6 @@ class TestHttpCompression:
|
@@ -754,3 +725,34 @@ class TestHttpCompression:
|
||||||
self._test_download_warnsize_request_meta("gzip")
|
except ImportError:
|
||||||
|
raise SkipTest("no zstd support (zstandard)")
|
||||||
def test_download_warnsize_request_meta_zstd(self):
|
|
||||||
- try:
|
|
||||||
- import zstandard # noqa: F401
|
|
||||||
- except ImportError:
|
|
||||||
- raise SkipTest("no zstd support (zstandard)")
|
|
||||||
+ _skip_if_no_zstd()
|
|
||||||
+
|
|
||||||
self._test_download_warnsize_request_meta("zstd")
|
self._test_download_warnsize_request_meta("zstd")
|
||||||
|
+
|
||||||
|
+ def _get_truncated_response(self, compression_id):
|
||||||
|
+ crawler = get_crawler(Spider)
|
||||||
|
+ spider = crawler._create_spider("scrapytest.org")
|
||||||
|
+ mw = HttpCompressionMiddleware.from_crawler(crawler)
|
||||||
|
+ mw.open_spider(spider)
|
||||||
|
+ response = self._getresponse(compression_id)
|
||||||
|
+ truncated_body = response.body[: len(response.body) // 2]
|
||||||
|
+ response = response.replace(body=truncated_body)
|
||||||
|
+ return mw.process_response(response.request, response, spider)
|
||||||
|
+
|
||||||
|
+ def test_process_truncated_response_br(self):
|
||||||
|
+ _skip_if_no_br()
|
||||||
|
+ resp = self._get_truncated_response("br")
|
||||||
|
+ assert resp.body.startswith(b"<!DOCTYPE")
|
||||||
|
+
|
||||||
|
+ def test_process_truncated_response_zlibdeflate(self):
|
||||||
|
+ resp = self._get_truncated_response("zlibdeflate")
|
||||||
|
+ assert resp.body.startswith(b"<!DOCTYPE")
|
||||||
|
+
|
||||||
|
+ def test_process_truncated_response_gzip(self):
|
||||||
|
+ resp = self._get_truncated_response("gzip")
|
||||||
|
+ assert resp.body.startswith(b"<!DOCTYPE")
|
||||||
|
+
|
||||||
|
+ def test_process_truncated_response_zstd(self):
|
||||||
|
+ _skip_if_no_zstd()
|
||||||
|
+ for check_key in FORMAT:
|
||||||
|
+ if not check_key.startswith("zstd-"):
|
||||||
|
+ continue
|
||||||
|
+ resp = self._get_truncated_response(check_key)
|
||||||
|
+ assert len(resp.body) == 0
|
||||||
|
Index: scrapy-2.13.3/tests/test_proxy_connect.py
|
||||||
|
===================================================================
|
||||||
|
--- scrapy-2.13.3.orig/tests/test_proxy_connect.py
|
||||||
|
+++ scrapy-2.13.3/tests/test_proxy_connect.py
|
||||||
|
@@ -62,6 +62,7 @@ def _wrong_credentials(proxy_url):
|
||||||
|
return urlunsplit(bad_auth_proxy)
|
||||||
|
|
||||||
|
|
||||||
|
+@pytest.mark.requires_mitmproxy
|
||||||
|
class TestProxyConnect(TestCase):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
@@ -73,13 +74,7 @@ class TestProxyConnect(TestCase):
|
||||||
|
cls.mockserver.__exit__(None, None, None)
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
- try:
|
||||||
|
- import mitmproxy # noqa: F401
|
||||||
|
- except ImportError:
|
||||||
|
- pytest.skip("mitmproxy is not installed")
|
||||||
|
-
|
||||||
|
self._oldenv = os.environ.copy()
|
||||||
|
-
|
||||||
|
self._proxy = MitmProxy()
|
||||||
|
proxy_url = self._proxy.start()
|
||||||
|
os.environ["https_proxy"] = proxy_url
|
||||||
Index: scrapy-2.13.3/tox.ini
|
Index: scrapy-2.13.3/tox.ini
|
||||||
===================================================================
|
===================================================================
|
||||||
--- scrapy-2.13.3.orig/tox.ini
|
--- scrapy-2.13.3.orig/tox.ini
|
||||||
+++ scrapy-2.13.3/tox.ini
|
+++ scrapy-2.13.3/tox.ini
|
||||||
@@ -141,8 +141,7 @@ deps =
|
@@ -112,9 +112,6 @@ deps =
|
||||||
|
w3lib==1.17.0
|
||||||
|
zope.interface==5.1.0
|
||||||
|
{[test-requirements]deps}
|
||||||
|
-
|
||||||
|
- # mitmproxy 8.0.0 requires upgrading some of the pinned dependencies
|
||||||
|
- # above, hence we do not install it in pinned environments at the moment
|
||||||
|
setenv =
|
||||||
|
_SCRAPY_PINNED=true
|
||||||
|
install_command =
|
||||||
|
@@ -141,8 +138,7 @@ deps =
|
||||||
Twisted[http2]
|
Twisted[http2]
|
||||||
boto3
|
boto3
|
||||||
bpython # optional for shell wrapper tests
|
bpython # optional for shell wrapper tests
|
||||||
@@ -671,7 +572,7 @@ Index: scrapy-2.13.3/tox.ini
|
|||||||
google-cloud-storage
|
google-cloud-storage
|
||||||
ipython
|
ipython
|
||||||
robotexclusionrulesparser
|
robotexclusionrulesparser
|
||||||
@@ -156,9 +155,7 @@ deps =
|
@@ -156,9 +152,7 @@ deps =
|
||||||
Pillow==8.0.0
|
Pillow==8.0.0
|
||||||
boto3==1.20.0
|
boto3==1.20.0
|
||||||
bpython==0.7.1
|
bpython==0.7.1
|
||||||
@@ -682,3 +583,31 @@ Index: scrapy-2.13.3/tox.ini
|
|||||||
google-cloud-storage==1.29.0
|
google-cloud-storage==1.29.0
|
||||||
ipython==2.0.0
|
ipython==2.0.0
|
||||||
robotexclusionrulesparser==1.6.2
|
robotexclusionrulesparser==1.6.2
|
||||||
|
@@ -258,7 +252,7 @@ deps =
|
||||||
|
{[testenv]deps}
|
||||||
|
botocore>=1.4.87
|
||||||
|
commands =
|
||||||
|
- pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore.junit.xml -o junit_family=legacy -m requires_botocore}
|
||||||
|
+ pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore.junit.xml -o junit_family=legacy} -m requires_botocore
|
||||||
|
|
||||||
|
[testenv:botocore-pinned]
|
||||||
|
basepython = {[pinned]basepython}
|
||||||
|
@@ -269,4 +263,17 @@ install_command = {[pinned]install_comma
|
||||||
|
setenv =
|
||||||
|
{[pinned]setenv}
|
||||||
|
commands =
|
||||||
|
- pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore-pinned.junit.xml -o junit_family=legacy -m requires_botocore}
|
||||||
|
+ pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore-pinned.junit.xml -o junit_family=legacy} -m requires_botocore
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+# Run proxy tests that use mitmproxy in a separate env to avoid installing
|
||||||
|
+# numerous mitmproxy deps in other envs (even in extra-deps), as they can
|
||||||
|
+# conflict with other deps we want, or don't want, to have installed there.
|
||||||
|
+
|
||||||
|
+[testenv:mitmproxy]
|
||||||
|
+deps =
|
||||||
|
+ {[testenv]deps}
|
||||||
|
+ # mitmproxy does not support PyPy
|
||||||
|
+ mitmproxy; implementation_name != "pypy"
|
||||||
|
+commands =
|
||||||
|
+ pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore.junit.xml -o junit_family=legacy} -m requires_mitmproxy
|
||||||
|
|||||||
@@ -1,3 +1,12 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Mon Nov 17 10:58:13 UTC 2025 - Daniel Garcia <daniel.garcia@suse.com>
|
||||||
|
|
||||||
|
- Update CVE-2025-6176.patch to reflect the latest changes upstream to
|
||||||
|
the patch.
|
||||||
|
- Remove the CVE-2025-6176-testfile-bomb-br-64GiB.bin source, it's not
|
||||||
|
needed anymore.
|
||||||
|
gh#scrapy/scrapy#7134, bsc#1252945, CVE-2025-6176)
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Wed Nov 12 12:28:41 UTC 2025 - Daniel Garcia <daniel.garcia@suse.com>
|
Wed Nov 12 12:28:41 UTC 2025 - Daniel Garcia <daniel.garcia@suse.com>
|
||||||
|
|
||||||
|
|||||||
@@ -38,9 +38,6 @@ Summary: A high-level Python Screen Scraping framework
|
|||||||
License: BSD-3-Clause
|
License: BSD-3-Clause
|
||||||
URL: https://scrapy.org
|
URL: https://scrapy.org
|
||||||
Source: https://files.pythonhosted.org/packages/source/s/scrapy/scrapy-%{version}.tar.gz
|
Source: https://files.pythonhosted.org/packages/source/s/scrapy/scrapy-%{version}.tar.gz
|
||||||
# New test file added in the gh#scrapy/scrapy#7134, needed for Patch2
|
|
||||||
# related to CVE-2025-6176
|
|
||||||
Source1: CVE-2025-6176-testfile-bomb-br-64GiB.bin
|
|
||||||
# PATCH-FIX-UPSTREAM gh#scrapy/scrapy#6922
|
# PATCH-FIX-UPSTREAM gh#scrapy/scrapy#6922
|
||||||
Patch0: remove-hoverxref.patch
|
Patch0: remove-hoverxref.patch
|
||||||
# PATCH-FIX-OPENSUSE No sphinx-rtd-dark-mode
|
# PATCH-FIX-OPENSUSE No sphinx-rtd-dark-mode
|
||||||
@@ -54,7 +51,7 @@ BuildRequires: %{python_module wheel}
|
|||||||
%if %{with test}
|
%if %{with test}
|
||||||
# Test requirements:
|
# Test requirements:
|
||||||
BuildRequires: %{python_module Scrapy = %{version}}
|
BuildRequires: %{python_module Scrapy = %{version}}
|
||||||
BuildRequires: %{python_module Brotli}
|
BuildRequires: %{python_module Brotli >= 1.2.0}
|
||||||
BuildRequires: %{python_module Pillow}
|
BuildRequires: %{python_module Pillow}
|
||||||
BuildRequires: %{python_module Protego}
|
BuildRequires: %{python_module Protego}
|
||||||
BuildRequires: %{python_module PyDispatcher >= 2.0.5}
|
BuildRequires: %{python_module PyDispatcher >= 2.0.5}
|
||||||
@@ -146,8 +143,6 @@ popd
|
|||||||
|
|
||||||
%if %{with test}
|
%if %{with test}
|
||||||
%check
|
%check
|
||||||
cp %{SOURCE1} tests/sample_data/compressed/bomb-br-64GiB.bin
|
|
||||||
|
|
||||||
# no color in obs chroot console
|
# no color in obs chroot console
|
||||||
skiplist="test_pformat"
|
skiplist="test_pformat"
|
||||||
# no online connection to toscrapy.com
|
# no online connection to toscrapy.com
|
||||||
|
|||||||
Reference in New Issue
Block a user