forked from pool/python-Scrapy
Compare commits
12 Commits
| Author | SHA256 | Date | |
|---|---|---|---|
| 911057633b | |||
| 1c9c9bf4a9 | |||
| 36b4bfaf0b | |||
| dba2d7540a | |||
| 406916eda8 | |||
| 0d3dbc2801 | |||
| a7924cbd08 | |||
| f2ecbd1d47 | |||
| d580aa635a | |||
| f56239b802 | |||
| 91941765d1 | |||
| eaffe32d22 |
2
.gitattributes
vendored
2
.gitattributes
vendored
@@ -21,3 +21,5 @@
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
## Specific LFS patterns
|
||||
CVE-2025-6176-testfile-bomb-br-64GiB.bin filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
3
CVE-2025-6176-testfile-bomb-br-64GiB.bin
Normal file
3
CVE-2025-6176-testfile-bomb-br-64GiB.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5d5b6139298c899595f784cdd36ff195dbdd479504c4a48d2a8e0a43d2e7a03d
|
||||
size 51713
|
||||
684
CVE-2025-6176.patch
Normal file
684
CVE-2025-6176.patch
Normal file
@@ -0,0 +1,684 @@
|
||||
From e3673d5a42cdd8be95c09982240317af1410fea3 Mon Sep 17 00:00:00 2001
|
||||
From: Rui Xi <Cycloctane@outlook.com>
|
||||
Date: Thu, 6 Nov 2025 18:53:35 +0800
|
||||
Subject: [PATCH 01/18] mitigate brotli decompression bomb
|
||||
|
||||
drop brotlicffi
|
||||
---
|
||||
.../downloadermiddlewares/httpcompression.py | 10 +--
|
||||
scrapy/utils/_compression.py | 75 +++++--------------
|
||||
scrapy/utils/gz.py | 9 +--
|
||||
...st_downloadermiddleware_httpcompression.py | 16 +---
|
||||
4 files changed, 29 insertions(+), 81 deletions(-)
|
||||
|
||||
Index: scrapy-2.13.3/scrapy/downloadermiddlewares/httpcompression.py
|
||||
===================================================================
|
||||
--- scrapy-2.13.3.orig/scrapy/downloadermiddlewares/httpcompression.py
|
||||
+++ scrapy-2.13.3/scrapy/downloadermiddlewares/httpcompression.py
|
||||
@@ -29,14 +29,20 @@ logger = getLogger(__name__)
|
||||
ACCEPTED_ENCODINGS: list[bytes] = [b"gzip", b"deflate"]
|
||||
|
||||
try:
|
||||
- try:
|
||||
- import brotli # noqa: F401
|
||||
- except ImportError:
|
||||
- import brotlicffi # noqa: F401
|
||||
+ import brotli
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
- ACCEPTED_ENCODINGS.append(b"br")
|
||||
+ try:
|
||||
+ brotli.Decompressor.can_accept_more_data
|
||||
+ except AttributeError:
|
||||
+ warnings.warn(
|
||||
+ "You have brotli installed. But 'br' encoding support now requires "
|
||||
+ "brotli version >= 1.2.0. Please upgrade brotli version to make Scrapy "
|
||||
+ "decode 'br' encoded responses.",
|
||||
+ )
|
||||
+ else:
|
||||
+ ACCEPTED_ENCODINGS.append(b"br")
|
||||
|
||||
try:
|
||||
import zstandard # noqa: F401
|
||||
@@ -98,13 +104,13 @@ class HttpCompressionMiddleware:
|
||||
decoded_body, content_encoding = self._handle_encoding(
|
||||
response.body, content_encoding, max_size
|
||||
)
|
||||
- except _DecompressionMaxSizeExceeded:
|
||||
+ except _DecompressionMaxSizeExceeded as e:
|
||||
raise IgnoreRequest(
|
||||
f"Ignored response {response} because its body "
|
||||
- f"({len(response.body)} B compressed) exceeded "
|
||||
- f"DOWNLOAD_MAXSIZE ({max_size} B) during "
|
||||
- f"decompression."
|
||||
- )
|
||||
+ f"({len(response.body)} B compressed, "
|
||||
+ f"{e.decompressed_size} B decompressed so far) exceeded "
|
||||
+ f"DOWNLOAD_MAXSIZE ({max_size} B) during decompression."
|
||||
+ ) from e
|
||||
if len(response.body) < warn_size <= len(decoded_body):
|
||||
logger.warning(
|
||||
f"{response} body size after decompression "
|
||||
@@ -187,7 +193,7 @@ class HttpCompressionMiddleware:
|
||||
f"from unsupported encoding(s) '{encodings_str}'."
|
||||
)
|
||||
if b"br" in encodings:
|
||||
- msg += " You need to install brotli or brotlicffi to decode 'br'."
|
||||
+ msg += " You need to install brotli >= 1.2.0 to decode 'br'."
|
||||
if b"zstd" in encodings:
|
||||
msg += " You need to install zstandard to decode 'zstd'."
|
||||
logger.warning(msg)
|
||||
Index: scrapy-2.13.3/scrapy/utils/_compression.py
|
||||
===================================================================
|
||||
--- scrapy-2.13.3.orig/scrapy/utils/_compression.py
|
||||
+++ scrapy-2.13.3/scrapy/utils/_compression.py
|
||||
@@ -1,42 +1,9 @@
|
||||
import contextlib
|
||||
import zlib
|
||||
from io import BytesIO
|
||||
-from warnings import warn
|
||||
-
|
||||
-from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
-
|
||||
-try:
|
||||
- try:
|
||||
- import brotli
|
||||
- except ImportError:
|
||||
- import brotlicffi as brotli
|
||||
-except ImportError:
|
||||
- pass
|
||||
-else:
|
||||
- try:
|
||||
- brotli.Decompressor.process
|
||||
- except AttributeError:
|
||||
- warn(
|
||||
- (
|
||||
- "You have brotlipy installed, and Scrapy will use it, but "
|
||||
- "Scrapy support for brotlipy is deprecated and will stop "
|
||||
- "working in a future version of Scrapy. brotlipy itself is "
|
||||
- "deprecated, it has been superseded by brotlicffi. Please, "
|
||||
- "uninstall brotlipy and install brotli or brotlicffi instead. "
|
||||
- "brotlipy has the same import name as brotli, so keeping both "
|
||||
- "installed is strongly discouraged."
|
||||
- ),
|
||||
- ScrapyDeprecationWarning,
|
||||
- )
|
||||
-
|
||||
- def _brotli_decompress(decompressor, data):
|
||||
- return decompressor.decompress(data)
|
||||
-
|
||||
- else:
|
||||
-
|
||||
- def _brotli_decompress(decompressor, data):
|
||||
- return decompressor.process(data)
|
||||
|
||||
+with contextlib.suppress(ImportError):
|
||||
+ import brotli
|
||||
|
||||
with contextlib.suppress(ImportError):
|
||||
import zstandard
|
||||
@@ -46,62 +13,64 @@ _CHUNK_SIZE = 65536 # 64 KiB
|
||||
|
||||
|
||||
class _DecompressionMaxSizeExceeded(ValueError):
|
||||
- pass
|
||||
+ def __init__(self, decompressed_size: int, max_size: int) -> None:
|
||||
+ self.decompressed_size = decompressed_size
|
||||
+ self.max_size = max_size
|
||||
+
|
||||
+ def __str__(self) -> str:
|
||||
+ return (
|
||||
+ "The number of bytes decompressed so far "
|
||||
+ f"({self.decompressed_size} B) exceeded the specified maximum "
|
||||
+ f"({self.max_size} B)."
|
||||
+ )
|
||||
+
|
||||
+
|
||||
+def _check_max_size(decompressed_size: int, max_size: int) -> None:
|
||||
+ if max_size and decompressed_size > max_size:
|
||||
+ raise _DecompressionMaxSizeExceeded(decompressed_size, max_size)
|
||||
|
||||
|
||||
def _inflate(data: bytes, *, max_size: int = 0) -> bytes:
|
||||
decompressor = zlib.decompressobj()
|
||||
- raw_decompressor = zlib.decompressobj(wbits=-15)
|
||||
- input_stream = BytesIO(data)
|
||||
+ try:
|
||||
+ first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE)
|
||||
+ except zlib.error:
|
||||
+ # to work with raw deflate content that may sent by microsoft servers.
|
||||
+ decompressor = zlib.decompressobj(wbits=-15)
|
||||
+ first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE)
|
||||
+ decompressed_size = len(first_chunk)
|
||||
+ _check_max_size(decompressed_size, max_size)
|
||||
output_stream = BytesIO()
|
||||
- output_chunk = b"."
|
||||
- decompressed_size = 0
|
||||
- while output_chunk:
|
||||
- input_chunk = input_stream.read(_CHUNK_SIZE)
|
||||
- try:
|
||||
- output_chunk = decompressor.decompress(input_chunk)
|
||||
- except zlib.error:
|
||||
- if decompressor != raw_decompressor:
|
||||
- # ugly hack to work with raw deflate content that may
|
||||
- # be sent by microsoft servers. For more information, see:
|
||||
- # http://carsten.codimi.de/gzip.yaws/
|
||||
- # http://www.port80software.com/200ok/archive/2005/10/31/868.aspx
|
||||
- # http://www.gzip.org/zlib/zlib_faq.html#faq38
|
||||
- decompressor = raw_decompressor
|
||||
- output_chunk = decompressor.decompress(input_chunk)
|
||||
- else:
|
||||
- raise
|
||||
+ output_stream.write(first_chunk)
|
||||
+ while decompressor.unconsumed_tail:
|
||||
+ output_chunk = decompressor.decompress(
|
||||
+ decompressor.unconsumed_tail, max_length=_CHUNK_SIZE
|
||||
+ )
|
||||
decompressed_size += len(output_chunk)
|
||||
- if max_size and decompressed_size > max_size:
|
||||
- raise _DecompressionMaxSizeExceeded(
|
||||
- f"The number of bytes decompressed so far "
|
||||
- f"({decompressed_size} B) exceed the specified maximum "
|
||||
- f"({max_size} B)."
|
||||
- )
|
||||
+ _check_max_size(decompressed_size, max_size)
|
||||
output_stream.write(output_chunk)
|
||||
- output_stream.seek(0)
|
||||
- return output_stream.read()
|
||||
+ if tail := decompressor.flush():
|
||||
+ decompressed_size += len(tail)
|
||||
+ _check_max_size(decompressed_size, max_size)
|
||||
+ output_stream.write(tail)
|
||||
+ return output_stream.getvalue()
|
||||
|
||||
|
||||
def _unbrotli(data: bytes, *, max_size: int = 0) -> bytes:
|
||||
decompressor = brotli.Decompressor()
|
||||
- input_stream = BytesIO(data)
|
||||
+ first_chunk = decompressor.process(data, output_buffer_limit=_CHUNK_SIZE)
|
||||
+ decompressed_size = len(first_chunk)
|
||||
+ _check_max_size(decompressed_size, max_size)
|
||||
output_stream = BytesIO()
|
||||
- output_chunk = b"."
|
||||
- decompressed_size = 0
|
||||
- while output_chunk:
|
||||
- input_chunk = input_stream.read(_CHUNK_SIZE)
|
||||
- output_chunk = _brotli_decompress(decompressor, input_chunk)
|
||||
+ output_stream.write(first_chunk)
|
||||
+ while not decompressor.is_finished():
|
||||
+ output_chunk = decompressor.process(b"", output_buffer_limit=_CHUNK_SIZE)
|
||||
+ if not output_chunk:
|
||||
+ raise ValueError("Truncated brotli compressed data")
|
||||
decompressed_size += len(output_chunk)
|
||||
- if max_size and decompressed_size > max_size:
|
||||
- raise _DecompressionMaxSizeExceeded(
|
||||
- f"The number of bytes decompressed so far "
|
||||
- f"({decompressed_size} B) exceed the specified maximum "
|
||||
- f"({max_size} B)."
|
||||
- )
|
||||
+ _check_max_size(decompressed_size, max_size)
|
||||
output_stream.write(output_chunk)
|
||||
- output_stream.seek(0)
|
||||
- return output_stream.read()
|
||||
+ return output_stream.getvalue()
|
||||
|
||||
|
||||
def _unzstd(data: bytes, *, max_size: int = 0) -> bytes:
|
||||
@@ -113,12 +82,6 @@ def _unzstd(data: bytes, *, max_size: in
|
||||
while output_chunk:
|
||||
output_chunk = stream_reader.read(_CHUNK_SIZE)
|
||||
decompressed_size += len(output_chunk)
|
||||
- if max_size and decompressed_size > max_size:
|
||||
- raise _DecompressionMaxSizeExceeded(
|
||||
- f"The number of bytes decompressed so far "
|
||||
- f"({decompressed_size} B) exceed the specified maximum "
|
||||
- f"({max_size} B)."
|
||||
- )
|
||||
+ _check_max_size(decompressed_size, max_size)
|
||||
output_stream.write(output_chunk)
|
||||
- output_stream.seek(0)
|
||||
- return output_stream.read()
|
||||
+ return output_stream.getvalue()
|
||||
Index: scrapy-2.13.3/scrapy/utils/gz.py
|
||||
===================================================================
|
||||
--- scrapy-2.13.3.orig/scrapy/utils/gz.py
|
||||
+++ scrapy-2.13.3/scrapy/utils/gz.py
|
||||
@@ -5,7 +5,7 @@ from gzip import GzipFile
|
||||
from io import BytesIO
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
-from ._compression import _CHUNK_SIZE, _DecompressionMaxSizeExceeded
|
||||
+from ._compression import _CHUNK_SIZE, _check_max_size
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.http import Response
|
||||
@@ -31,15 +31,9 @@ def gunzip(data: bytes, *, max_size: int
|
||||
break
|
||||
raise
|
||||
decompressed_size += len(chunk)
|
||||
- if max_size and decompressed_size > max_size:
|
||||
- raise _DecompressionMaxSizeExceeded(
|
||||
- f"The number of bytes decompressed so far "
|
||||
- f"({decompressed_size} B) exceed the specified maximum "
|
||||
- f"({max_size} B)."
|
||||
- )
|
||||
+ _check_max_size(decompressed_size, max_size)
|
||||
output_stream.write(chunk)
|
||||
- output_stream.seek(0)
|
||||
- return output_stream.read()
|
||||
+ return output_stream.getvalue()
|
||||
|
||||
|
||||
def gzip_magic_number(response: Response) -> bool:
|
||||
Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
||||
===================================================================
|
||||
--- scrapy-2.13.3.orig/tests/test_downloadermiddleware_httpcompression.py
|
||||
+++ scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
|
||||
@@ -2,7 +2,6 @@ from gzip import GzipFile
|
||||
from io import BytesIO
|
||||
from logging import WARNING
|
||||
from pathlib import Path
|
||||
-from unittest import SkipTest
|
||||
|
||||
import pytest
|
||||
from testfixtures import LogCapture
|
||||
@@ -48,9 +47,26 @@ FORMAT = {
|
||||
"zstd", # 1 096 → 11 511 612
|
||||
)
|
||||
},
|
||||
+ "bomb-br-64GiB": ("bomb-br-64GiB.bin", "br"), # 51K → 64 GiB decompression bomb
|
||||
}
|
||||
|
||||
|
||||
+def _skip_if_no_br() -> None:
|
||||
+ try:
|
||||
+ import brotli # noqa: PLC0415
|
||||
+
|
||||
+ brotli.Decompressor.can_accept_more_data
|
||||
+ except (ImportError, AttributeError):
|
||||
+ pytest.skip("no brotli support")
|
||||
+
|
||||
+
|
||||
+def _skip_if_no_zstd() -> None:
|
||||
+ try:
|
||||
+ import zstandard # noqa: F401,PLC0415
|
||||
+ except ImportError:
|
||||
+ pytest.skip("no zstd support (zstandard)")
|
||||
+
|
||||
+
|
||||
class TestHttpCompression:
|
||||
def setup_method(self):
|
||||
self.crawler = get_crawler(Spider)
|
||||
@@ -124,13 +140,8 @@ class TestHttpCompression:
|
||||
self.assertStatsEqual("httpcompression/response_bytes", 74837)
|
||||
|
||||
def test_process_response_br(self):
|
||||
- try:
|
||||
- try:
|
||||
- import brotli # noqa: F401
|
||||
- except ImportError:
|
||||
- import brotlicffi # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no brotli")
|
||||
+ _skip_if_no_br()
|
||||
+
|
||||
response = self._getresponse("br")
|
||||
request = response.request
|
||||
assert response.headers["Content-Encoding"] == b"br"
|
||||
@@ -143,14 +154,9 @@ class TestHttpCompression:
|
||||
|
||||
def test_process_response_br_unsupported(self):
|
||||
try:
|
||||
- try:
|
||||
- import brotli # noqa: F401
|
||||
-
|
||||
- raise SkipTest("Requires not having brotli support")
|
||||
- except ImportError:
|
||||
- import brotlicffi # noqa: F401
|
||||
+ import brotli # noqa: F401,PLC0415
|
||||
|
||||
- raise SkipTest("Requires not having brotli support")
|
||||
+ pytest.skip("Requires not having brotli support")
|
||||
except ImportError:
|
||||
pass
|
||||
response = self._getresponse("br")
|
||||
@@ -169,7 +175,7 @@ class TestHttpCompression:
|
||||
(
|
||||
"HttpCompressionMiddleware cannot decode the response for"
|
||||
" http://scrapytest.org/ from unsupported encoding(s) 'br'."
|
||||
- " You need to install brotli or brotlicffi to decode 'br'."
|
||||
+ " You need to install brotli >= 1.2.0 to decode 'br'."
|
||||
),
|
||||
),
|
||||
)
|
||||
@@ -177,10 +183,8 @@ class TestHttpCompression:
|
||||
assert newresponse.headers.getlist("Content-Encoding") == [b"br"]
|
||||
|
||||
def test_process_response_zstd(self):
|
||||
- try:
|
||||
- import zstandard # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no zstd support (zstandard)")
|
||||
+ _skip_if_no_zstd()
|
||||
+
|
||||
raw_content = None
|
||||
for check_key in FORMAT:
|
||||
if not check_key.startswith("zstd-"):
|
||||
@@ -199,9 +203,9 @@ class TestHttpCompression:
|
||||
|
||||
def test_process_response_zstd_unsupported(self):
|
||||
try:
|
||||
- import zstandard # noqa: F401
|
||||
+ import zstandard # noqa: F401,PLC0415
|
||||
|
||||
- raise SkipTest("Requires not having zstandard support")
|
||||
+ pytest.skip("Requires not having zstandard support")
|
||||
except ImportError:
|
||||
pass
|
||||
response = self._getresponse("zstd-static-content-size")
|
||||
@@ -503,24 +507,20 @@ class TestHttpCompression:
|
||||
self.assertStatsEqual("httpcompression/response_bytes", None)
|
||||
|
||||
def _test_compression_bomb_setting(self, compression_id):
|
||||
- settings = {"DOWNLOAD_MAXSIZE": 10_000_000}
|
||||
+ settings = {"DOWNLOAD_MAXSIZE": 1_000_000}
|
||||
crawler = get_crawler(Spider, settings_dict=settings)
|
||||
spider = crawler._create_spider("scrapytest.org")
|
||||
mw = HttpCompressionMiddleware.from_crawler(crawler)
|
||||
mw.open_spider(spider)
|
||||
|
||||
- response = self._getresponse(f"bomb-{compression_id}")
|
||||
- with pytest.raises(IgnoreRequest):
|
||||
- mw.process_response(response.request, response, spider)
|
||||
+ response = self._getresponse(f"bomb-{compression_id}") # 11_511_612 B
|
||||
+ with pytest.raises(IgnoreRequest) as exc_info:
|
||||
+ mw.process_response(response.request, response, self.spider)
|
||||
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
|
||||
|
||||
def test_compression_bomb_setting_br(self):
|
||||
- try:
|
||||
- try:
|
||||
- import brotli # noqa: F401
|
||||
- except ImportError:
|
||||
- import brotlicffi # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no brotli")
|
||||
+ _skip_if_no_br()
|
||||
+
|
||||
self._test_compression_bomb_setting("br")
|
||||
|
||||
def test_compression_bomb_setting_deflate(self):
|
||||
@@ -530,15 +530,13 @@ class TestHttpCompression:
|
||||
self._test_compression_bomb_setting("gzip")
|
||||
|
||||
def test_compression_bomb_setting_zstd(self):
|
||||
- try:
|
||||
- import zstandard # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no zstd support (zstandard)")
|
||||
+ _skip_if_no_zstd()
|
||||
+
|
||||
self._test_compression_bomb_setting("zstd")
|
||||
|
||||
def _test_compression_bomb_spider_attr(self, compression_id):
|
||||
class DownloadMaxSizeSpider(Spider):
|
||||
- download_maxsize = 10_000_000
|
||||
+ download_maxsize = 1_000_000
|
||||
|
||||
crawler = get_crawler(DownloadMaxSizeSpider)
|
||||
spider = crawler._create_spider("scrapytest.org")
|
||||
@@ -546,30 +544,28 @@ class TestHttpCompression:
|
||||
mw.open_spider(spider)
|
||||
|
||||
response = self._getresponse(f"bomb-{compression_id}")
|
||||
- with pytest.raises(IgnoreRequest):
|
||||
- mw.process_response(response.request, response, spider)
|
||||
+ with pytest.raises(IgnoreRequest) as exc_info:
|
||||
+ mw.process_response(response.request, response, self.spider)
|
||||
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
|
||||
|
||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
||||
def test_compression_bomb_spider_attr_br(self):
|
||||
- try:
|
||||
- try:
|
||||
- import brotli # noqa: F401
|
||||
- except ImportError:
|
||||
- import brotlicffi # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no brotli")
|
||||
+ _skip_if_no_br()
|
||||
+
|
||||
self._test_compression_bomb_spider_attr("br")
|
||||
|
||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
||||
def test_compression_bomb_spider_attr_deflate(self):
|
||||
self._test_compression_bomb_spider_attr("deflate")
|
||||
|
||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
||||
def test_compression_bomb_spider_attr_gzip(self):
|
||||
self._test_compression_bomb_spider_attr("gzip")
|
||||
|
||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
||||
def test_compression_bomb_spider_attr_zstd(self):
|
||||
- try:
|
||||
- import zstandard # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no zstd support (zstandard)")
|
||||
+ _skip_if_no_zstd()
|
||||
+
|
||||
self._test_compression_bomb_spider_attr("zstd")
|
||||
|
||||
def _test_compression_bomb_request_meta(self, compression_id):
|
||||
@@ -579,18 +575,14 @@ class TestHttpCompression:
|
||||
mw.open_spider(spider)
|
||||
|
||||
response = self._getresponse(f"bomb-{compression_id}")
|
||||
- response.meta["download_maxsize"] = 10_000_000
|
||||
- with pytest.raises(IgnoreRequest):
|
||||
- mw.process_response(response.request, response, spider)
|
||||
+ response.meta["download_maxsize"] = 1_000_000
|
||||
+ with pytest.raises(IgnoreRequest) as exc_info:
|
||||
+ mw.process_response(response.request, response, self.spider)
|
||||
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
|
||||
|
||||
def test_compression_bomb_request_meta_br(self):
|
||||
- try:
|
||||
- try:
|
||||
- import brotli # noqa: F401
|
||||
- except ImportError:
|
||||
- import brotlicffi # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no brotli")
|
||||
+ _skip_if_no_br()
|
||||
+
|
||||
self._test_compression_bomb_request_meta("br")
|
||||
|
||||
def test_compression_bomb_request_meta_deflate(self):
|
||||
@@ -600,12 +592,38 @@ class TestHttpCompression:
|
||||
self._test_compression_bomb_request_meta("gzip")
|
||||
|
||||
def test_compression_bomb_request_meta_zstd(self):
|
||||
- try:
|
||||
- import zstandard # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no zstd support (zstandard)")
|
||||
+ _skip_if_no_zstd()
|
||||
+
|
||||
self._test_compression_bomb_request_meta("zstd")
|
||||
|
||||
+ def test_compression_bomb_output_buffer_limit(self):
|
||||
+ """Test that the 64 GiB brotli decompression bomb is properly handled.
|
||||
+
|
||||
+ This test ensures that the output_buffer_limit parameter in the brotli
|
||||
+ decompressor prevents the decompression bomb attack. The bomb file is
|
||||
+ approximately 51 KB compressed but would decompress to 64 GiB, which
|
||||
+ should trigger IgnoreRequest when DOWNLOAD_MAXSIZE is exceeded.
|
||||
+ """
|
||||
+ _skip_if_no_br()
|
||||
+
|
||||
+ settings = {"DOWNLOAD_MAXSIZE": 10_000_000} # 10 MB limit
|
||||
+ crawler = get_crawler(Spider, settings_dict=settings)
|
||||
+ spider = crawler._create_spider("scrapytest.org")
|
||||
+ mw = HttpCompressionMiddleware.from_crawler(crawler)
|
||||
+ mw.open_spider(spider)
|
||||
+
|
||||
+ response = self._getresponse("bomb-br-64GiB")
|
||||
+
|
||||
+ # Verify the response is properly configured
|
||||
+ assert response.headers["Content-Encoding"] == b"br"
|
||||
+
|
||||
+ # The middleware should raise IgnoreRequest due to exceeding DOWNLOAD_MAXSIZE
|
||||
+ with pytest.raises(IgnoreRequest) as exc_info:
|
||||
+ mw.process_response(response.request, response, self.spider)
|
||||
+
|
||||
+ # Verify the exception message mentions the download size limits
|
||||
+ assert "exceeded DOWNLOAD_MAXSIZE (10000000 B)" in str(exc_info.value)
|
||||
+
|
||||
def _test_download_warnsize_setting(self, compression_id):
|
||||
settings = {"DOWNLOAD_WARNSIZE": 10_000_000}
|
||||
crawler = get_crawler(Spider, settings_dict=settings)
|
||||
@@ -619,7 +637,7 @@ class TestHttpCompression:
|
||||
propagate=False,
|
||||
level=WARNING,
|
||||
) as log:
|
||||
- mw.process_response(response.request, response, spider)
|
||||
+ mw.process_response(response.request, response, self.spider)
|
||||
log.check(
|
||||
(
|
||||
"scrapy.downloadermiddlewares.httpcompression",
|
||||
@@ -633,13 +651,8 @@ class TestHttpCompression:
|
||||
)
|
||||
|
||||
def test_download_warnsize_setting_br(self):
|
||||
- try:
|
||||
- try:
|
||||
- import brotli # noqa: F401
|
||||
- except ImportError:
|
||||
- import brotlicffi # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no brotli")
|
||||
+ _skip_if_no_br()
|
||||
+
|
||||
self._test_download_warnsize_setting("br")
|
||||
|
||||
def test_download_warnsize_setting_deflate(self):
|
||||
@@ -649,10 +662,8 @@ class TestHttpCompression:
|
||||
self._test_download_warnsize_setting("gzip")
|
||||
|
||||
def test_download_warnsize_setting_zstd(self):
|
||||
- try:
|
||||
- import zstandard # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no zstd support (zstandard)")
|
||||
+ _skip_if_no_zstd()
|
||||
+
|
||||
self._test_download_warnsize_setting("zstd")
|
||||
|
||||
def _test_download_warnsize_spider_attr(self, compression_id):
|
||||
@@ -670,7 +681,7 @@ class TestHttpCompression:
|
||||
propagate=False,
|
||||
level=WARNING,
|
||||
) as log:
|
||||
- mw.process_response(response.request, response, spider)
|
||||
+ mw.process_response(response.request, response, self.spider)
|
||||
log.check(
|
||||
(
|
||||
"scrapy.downloadermiddlewares.httpcompression",
|
||||
@@ -683,27 +694,24 @@ class TestHttpCompression:
|
||||
),
|
||||
)
|
||||
|
||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
||||
def test_download_warnsize_spider_attr_br(self):
|
||||
- try:
|
||||
- try:
|
||||
- import brotli # noqa: F401
|
||||
- except ImportError:
|
||||
- import brotlicffi # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no brotli")
|
||||
+ _skip_if_no_br()
|
||||
+
|
||||
self._test_download_warnsize_spider_attr("br")
|
||||
|
||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
||||
def test_download_warnsize_spider_attr_deflate(self):
|
||||
self._test_download_warnsize_spider_attr("deflate")
|
||||
|
||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
||||
def test_download_warnsize_spider_attr_gzip(self):
|
||||
self._test_download_warnsize_spider_attr("gzip")
|
||||
|
||||
+ @pytest.mark.filterwarnings("ignore::scrapy.exceptions.ScrapyDeprecationWarning")
|
||||
def test_download_warnsize_spider_attr_zstd(self):
|
||||
- try:
|
||||
- import zstandard # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no zstd support (zstandard)")
|
||||
+ _skip_if_no_zstd()
|
||||
+
|
||||
self._test_download_warnsize_spider_attr("zstd")
|
||||
|
||||
def _test_download_warnsize_request_meta(self, compression_id):
|
||||
@@ -719,7 +727,7 @@ class TestHttpCompression:
|
||||
propagate=False,
|
||||
level=WARNING,
|
||||
) as log:
|
||||
- mw.process_response(response.request, response, spider)
|
||||
+ mw.process_response(response.request, response, self.spider)
|
||||
log.check(
|
||||
(
|
||||
"scrapy.downloadermiddlewares.httpcompression",
|
||||
@@ -733,13 +741,8 @@ class TestHttpCompression:
|
||||
)
|
||||
|
||||
def test_download_warnsize_request_meta_br(self):
|
||||
- try:
|
||||
- try:
|
||||
- import brotli # noqa: F401
|
||||
- except ImportError:
|
||||
- import brotlicffi # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no brotli")
|
||||
+ _skip_if_no_br()
|
||||
+
|
||||
self._test_download_warnsize_request_meta("br")
|
||||
|
||||
def test_download_warnsize_request_meta_deflate(self):
|
||||
@@ -749,8 +752,6 @@ class TestHttpCompression:
|
||||
self._test_download_warnsize_request_meta("gzip")
|
||||
|
||||
def test_download_warnsize_request_meta_zstd(self):
|
||||
- try:
|
||||
- import zstandard # noqa: F401
|
||||
- except ImportError:
|
||||
- raise SkipTest("no zstd support (zstandard)")
|
||||
+ _skip_if_no_zstd()
|
||||
+
|
||||
self._test_download_warnsize_request_meta("zstd")
|
||||
Index: scrapy-2.13.3/tox.ini
|
||||
===================================================================
|
||||
--- scrapy-2.13.3.orig/tox.ini
|
||||
+++ scrapy-2.13.3/tox.ini
|
||||
@@ -141,8 +141,7 @@ deps =
|
||||
Twisted[http2]
|
||||
boto3
|
||||
bpython # optional for shell wrapper tests
|
||||
- brotli; implementation_name != "pypy" # optional for HTTP compress downloader middleware tests
|
||||
- brotlicffi; implementation_name == "pypy" # optional for HTTP compress downloader middleware tests
|
||||
+ brotli >= 1.2.0 # optional for HTTP compress downloader middleware tests
|
||||
google-cloud-storage
|
||||
ipython
|
||||
robotexclusionrulesparser
|
||||
@@ -156,9 +155,7 @@ deps =
|
||||
Pillow==8.0.0
|
||||
boto3==1.20.0
|
||||
bpython==0.7.1
|
||||
- brotli==0.5.2; implementation_name != "pypy"
|
||||
- brotlicffi==0.8.0; implementation_name == "pypy"
|
||||
- brotlipy
|
||||
+ brotli==1.2.0
|
||||
google-cloud-storage==1.29.0
|
||||
ipython==2.0.0
|
||||
robotexclusionrulesparser==1.6.2
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:733a039c7423e52b69bf2810b5332093d4e42a848460359c07b02ecff8f73ebe
|
||||
size 1176726
|
||||
3
_multibuild
Normal file
3
_multibuild
Normal file
@@ -0,0 +1,3 @@
|
||||
<multibuild>
|
||||
<package>test</package>
|
||||
</multibuild>
|
||||
18
no-dark-mode.patch
Normal file
18
no-dark-mode.patch
Normal file
@@ -0,0 +1,18 @@
|
||||
Index: scrapy-2.13.3/docs/conf.py
|
||||
===================================================================
|
||||
--- scrapy-2.13.3.orig/docs/conf.py
|
||||
+++ scrapy-2.13.3/docs/conf.py
|
||||
@@ -34,7 +34,7 @@ extensions = [
|
||||
"sphinx.ext.coverage",
|
||||
"sphinx.ext.intersphinx",
|
||||
"sphinx.ext.viewcode",
|
||||
- "sphinx_rtd_dark_mode",
|
||||
+ "sphinx_rtd_theme",
|
||||
]
|
||||
|
||||
templates_path = ["_templates"]
|
||||
@@ -158,4 +158,3 @@ intersphinx_mapping = {
|
||||
intersphinx_disabled_reftypes: Sequence[str] = []
|
||||
|
||||
# -- Other options ------------------------------------------------------------
|
||||
-default_dark_mode = False
|
||||
@@ -1,3 +1,119 @@
|
||||
-------------------------------------------------------------------
|
||||
Wed Nov 12 12:28:41 UTC 2025 - Daniel Garcia <daniel.garcia@suse.com>
|
||||
|
||||
- Use libalternatives
|
||||
- Use multibuild to run tests in a subpackage
|
||||
- add upstream patch CVE-2025-6176.patch to mitigate brotli and
|
||||
deflate decompression bombs DoS.
|
||||
This patch adds a new bin test file that was added as a new source
|
||||
as CVE-2025-6176-testfile-bomb-br-64GiB.bin
|
||||
gh#scrapy/scrapy#7134, bsc#1252945, CVE-2025-6176)
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Jul 31 05:18:40 UTC 2025 - Steve Kowalik <steven.kowalik@suse.com>
|
||||
|
||||
- Update to 2.13.3:
|
||||
* Changed the values for DOWNLOAD_DELAY (from 0 to 1) and
|
||||
CONCURRENT_REQUESTS_PER_DOMAIN (from 8 to 1) in the default project
|
||||
template.
|
||||
* Fixed several bugs in the engine initialization and exception handling
|
||||
logic.
|
||||
* Allowed running tests with Twisted 25.5.0+ again and fixed test failures
|
||||
with lxml 6.0.0.
|
||||
* Give callback requests precedence over start requests when priority
|
||||
values are the same.
|
||||
* The asyncio reactor is now enabled by default
|
||||
* Replaced start_requests() (sync) with start() (async) and changed how it
|
||||
is iterated.
|
||||
* Added the allow_offsite request meta key
|
||||
* Spider middlewares that don't support asynchronous spider output are
|
||||
deprecated
|
||||
* Added a base class for universal spider middlewares
|
||||
- Add patch remove-hoverxref.patch:
|
||||
* Do not use deprecated sphinx-hoverxref extension.
|
||||
- Add patch no-dark-mode.patch:
|
||||
* Do not use unavailable sphinx-rtd-dark-mode extension.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Mar 27 05:45:59 UTC 2025 - Steve Kowalik <steven.kowalik@suse.com>
|
||||
|
||||
- Normalize metadata directory name.
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Tue Dec 3 08:24:29 UTC 2024 - Steve Kowalik <steven.kowalik@suse.com>
|
||||
|
||||
- Update to 2.12.0:
|
||||
* Dropped support for Python 3.8, added support for Python 3.13
|
||||
* start_requests can now yield items
|
||||
* Added scrapy.http.JsonResponse
|
||||
* Added the CLOSESPIDER_PAGECOUNT_NO_ITEM setting
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Thu Jul 11 10:38:36 UTC 2024 - Dirk Müller <dmueller@suse.com>
|
||||
|
||||
- update to 2.11.2 (bsc#1224474, CVE-2024-1968):
|
||||
* Redirects to non-HTTP protocols are no longer followed.
|
||||
Please, see the 23j4-mw76-5v7h security advisory for more
|
||||
information. (:issue:`457`)
|
||||
* The Authorization header is now dropped on redirects to a
|
||||
different scheme (http:// or https://) or port, even if the
|
||||
domain is the same. Please, see the 4qqq-9vqf-3h3f security
|
||||
advisory for more information.
|
||||
* When using system proxy settings that are different for
|
||||
http:// and https://, redirects to a different URL scheme
|
||||
will now also trigger the corresponding change in proxy
|
||||
settings for the redirected request. Please, see the
|
||||
jm3v-qxmh-hxwv security advisory for more information.
|
||||
(:issue:`767`)
|
||||
* :attr:`Spider.allowed_domains
|
||||
<scrapy.Spider.allowed_domains>` is now enforced for all
|
||||
requests, and not only requests from spider callbacks.
|
||||
* :func:`~scrapy.utils.iterators.xmliter_lxml` no longer
|
||||
resolves XML entities.
|
||||
* defusedxml is now used to make
|
||||
:class:`scrapy.http.request.rpc.XmlRpcRequest` more secure.
|
||||
* Restored support for brotlipy_, which had been dropped in
|
||||
Scrapy 2.11.1 in favor of brotli. (:issue:`6261`) Note
|
||||
brotlipy is deprecated, both in Scrapy and upstream. Use
|
||||
brotli instead if you can.
|
||||
* Make :setting:`METAREFRESH_IGNORE_TAGS` ["noscript"] by
|
||||
default. This prevents :class:`~scrapy.downloadermiddlewares.
|
||||
redirect.MetaRefreshMiddleware` from following redirects that
|
||||
would not be followed by web browsers with JavaScript
|
||||
enabled.
|
||||
* During :ref:`feed export <topics-feed-exports>`, do not close
|
||||
the underlying file from :ref:`built-in post-processing
|
||||
plugins <builtin-plugins>`.
|
||||
* :class:`LinkExtractor
|
||||
<scrapy.linkextractors.lxmlhtml.LxmlLinkExtractor>` now
|
||||
properly applies the unique and canonicalize parameters.
|
||||
* Do not initialize the scheduler disk queue if
|
||||
:setting:`JOBDIR` is an empty string.
|
||||
* Fix :attr:`Spider.logger <scrapy.Spider.logger>` not logging
|
||||
custom extra information.
|
||||
* robots.txt files with a non-UTF-8 encoding no longer prevent
|
||||
parsing the UTF-8-compatible (e.g. ASCII) parts of the
|
||||
document.
|
||||
* :meth:`scrapy.http.cookies.WrappedRequest.get_header` no
|
||||
longer raises an exception if default is None.
|
||||
:func:`scrapy.utils.response.get_base_url` to determine the
|
||||
base URL of a given :class:`~scrapy.http.Response`.
|
||||
* :class:`~scrapy.selector.Selector` now uses
|
||||
:func:`scrapy.utils.response.get_base_url` to determine the
|
||||
base URL of a given :class:`~scrapy.http.Response`.
|
||||
(:issue:`6265`)
|
||||
* The :meth:`media_to_download` method of :ref:`media pipelines
|
||||
<topics-media-pipeline>` now logs exceptions before stripping
|
||||
them.
|
||||
* When passing a callback to the :command:`parse` command,
|
||||
build the callback callable with the right signature.
|
||||
* Add a FAQ entry about :ref:`creating blank requests <faq-
|
||||
blank-request>`.
|
||||
* Document that :attr:`scrapy.selector.Selector.type` can be
|
||||
"json".
|
||||
* Make builds reproducible.
|
||||
* Packaging and test fixes
|
||||
|
||||
-------------------------------------------------------------------
|
||||
Mon Mar 25 14:12:20 UTC 2024 - Dirk Müller <dmueller@suse.com>
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# spec file for package python-Scrapy
|
||||
#
|
||||
# Copyright (c) 2024 SUSE LLC
|
||||
# Copyright (c) 2025 SUSE LLC and contributors
|
||||
#
|
||||
# All modifications and additions to the file contributed by third parties
|
||||
# remain the property of their copyright owners, unless otherwise agreed
|
||||
@@ -16,53 +16,84 @@
|
||||
#
|
||||
|
||||
|
||||
%global flavor @BUILD_FLAVOR@%{nil}
|
||||
%if "%{flavor}" == "test"
|
||||
%define psuffix -test
|
||||
%bcond_without test
|
||||
%endif
|
||||
%if "%{flavor}" == ""
|
||||
%define psuffix %{nil}
|
||||
%bcond_with test
|
||||
%endif
|
||||
%if 0%{?suse_version} > 1500
|
||||
%bcond_without libalternatives
|
||||
%else
|
||||
%bcond_with libalternatives
|
||||
%endif
|
||||
%{?sle15_python_module_pythons}
|
||||
Name: python-Scrapy
|
||||
Version: 2.11.1
|
||||
Name: python-Scrapy%{?psuffix}
|
||||
Version: 2.13.3
|
||||
Release: 0
|
||||
Summary: A high-level Python Screen Scraping framework
|
||||
License: BSD-3-Clause
|
||||
Group: Development/Languages/Python
|
||||
URL: https://scrapy.org
|
||||
Source: https://files.pythonhosted.org/packages/source/S/Scrapy/Scrapy-%{version}.tar.gz
|
||||
Source: https://files.pythonhosted.org/packages/source/s/scrapy/scrapy-%{version}.tar.gz
|
||||
# New test file added in the gh#scrapy/scrapy#7134, needed for Patch2
|
||||
# related to CVE-2025-6176
|
||||
Source1: CVE-2025-6176-testfile-bomb-br-64GiB.bin
|
||||
# PATCH-FIX-UPSTREAM gh#scrapy/scrapy#6922
|
||||
Patch0: remove-hoverxref.patch
|
||||
# PATCH-FIX-OPENSUSE No sphinx-rtd-dark-mode
|
||||
Patch1: no-dark-mode.patch
|
||||
# PATCH-FIX-UPSTREAM CVE-2025-6176.patch gh#scrapy/scrapy#7134
|
||||
Patch2: CVE-2025-6176.patch
|
||||
BuildRequires: %{python_module base >= 3.9}
|
||||
BuildRequires: %{python_module hatchling}
|
||||
BuildRequires: %{python_module pip}
|
||||
BuildRequires: %{python_module wheel}
|
||||
%if %{with test}
|
||||
# Test requirements:
|
||||
BuildRequires: %{python_module Scrapy = %{version}}
|
||||
BuildRequires: %{python_module Brotli}
|
||||
BuildRequires: %{python_module Pillow}
|
||||
BuildRequires: %{python_module Protego >= 0.1.15}
|
||||
BuildRequires: %{python_module Protego}
|
||||
BuildRequires: %{python_module PyDispatcher >= 2.0.5}
|
||||
BuildRequires: %{python_module Twisted >= 18.9.0}
|
||||
BuildRequires: %{python_module attrs}
|
||||
BuildRequires: %{python_module base >= 3.8}
|
||||
BuildRequires: %{python_module botocore >= 1.4.87}
|
||||
BuildRequires: %{python_module cryptography >= 36.0.0}
|
||||
BuildRequires: %{python_module cssselect >= 0.9.1}
|
||||
BuildRequires: %{python_module dbm}
|
||||
BuildRequires: %{python_module defusedxml >= 0.7.1}
|
||||
BuildRequires: %{python_module itemadapter >= 0.1.0}
|
||||
BuildRequires: %{python_module itemloaders >= 1.0.1}
|
||||
BuildRequires: %{python_module lxml >= 4.4.1}
|
||||
BuildRequires: %{python_module parsel >= 1.5.0}
|
||||
BuildRequires: %{python_module pexpect >= 4.8.1}
|
||||
BuildRequires: %{python_module pip}
|
||||
BuildRequires: %{python_module pyOpenSSL >= 21.0.0}
|
||||
BuildRequires: %{python_module pyftpdlib >= 1.5.8}
|
||||
BuildRequires: %{python_module pytest-xdist}
|
||||
BuildRequires: %{python_module pytest}
|
||||
BuildRequires: %{python_module queuelib >= 1.4.2}
|
||||
BuildRequires: %{python_module service_identity >= 18.1.0}
|
||||
BuildRequires: %{python_module setuptools}
|
||||
BuildRequires: %{python_module sybil}
|
||||
BuildRequires: %{python_module testfixtures}
|
||||
BuildRequires: %{python_module tldextract}
|
||||
BuildRequires: %{python_module uvloop}
|
||||
BuildRequires: %{python_module w3lib >= 1.17.0}
|
||||
BuildRequires: %{python_module wheel}
|
||||
BuildRequires: %{python_module zope.interface >= 5.1.0}
|
||||
%endif
|
||||
BuildRequires: fdupes
|
||||
BuildRequires: python-rpm-macros
|
||||
BuildRequires: python3-Sphinx
|
||||
BuildRequires: python3-sphinx-notfound-page
|
||||
BuildRequires: python3-sphinx_rtd_theme
|
||||
Requires: python-Protego >= 0.1.15
|
||||
Requires: python-PyDispatcher >= 2.0.5
|
||||
Requires: python-Twisted >= 18.9.0
|
||||
Requires: python-cryptography >= 36.0.0
|
||||
Requires: python-cssselect >= 0.9.1
|
||||
Requires: python-defusedxml >= 0.7.1
|
||||
Requires: python-itemadapter >= 0.1.0
|
||||
Requires: python-itemloaders >= 1.0.1
|
||||
Requires: python-lxml >= 4.4.1
|
||||
@@ -71,13 +102,17 @@ Requires: python-parsel >= 1.5.0
|
||||
Requires: python-pyOpenSSL >= 21.0.0
|
||||
Requires: python-queuelib >= 1.4.2
|
||||
Requires: python-service_identity >= 18.1.0
|
||||
Requires: python-setuptools
|
||||
Requires: python-tldextract
|
||||
Requires: python-w3lib >= 1.17.2
|
||||
Requires: python-zope.interface >= 5.1.0
|
||||
BuildArch: noarch
|
||||
%if %{with libalternatives}
|
||||
BuildRequires: alts
|
||||
Requires: alts
|
||||
%else
|
||||
Requires(post): update-alternatives
|
||||
Requires(postun): update-alternatives
|
||||
BuildArch: noarch
|
||||
%endif
|
||||
%python_subpackages
|
||||
|
||||
%description
|
||||
@@ -87,16 +122,16 @@ retrieval to monitoring or testing web sites.
|
||||
|
||||
%package -n %{name}-doc
|
||||
Summary: Documentation for %{name}
|
||||
Group: Documentation/HTML
|
||||
|
||||
%description -n %{name}-doc
|
||||
Provides documentation for %{name}.
|
||||
|
||||
%prep
|
||||
%autosetup -p1 -n Scrapy-%{version}
|
||||
%autosetup -p1 -n scrapy-%{version}
|
||||
|
||||
sed -i -e 's:= python:= python3:g' docs/Makefile
|
||||
|
||||
%if %{without test}
|
||||
%build
|
||||
%pyproject_wheel
|
||||
pushd docs
|
||||
@@ -107,18 +142,30 @@ popd
|
||||
%pyproject_install
|
||||
%python_clone -a %{buildroot}%{_bindir}/scrapy
|
||||
%python_expand %fdupes %{buildroot}%{$python_sitelib}
|
||||
%endif
|
||||
|
||||
%if %{with test}
|
||||
%check
|
||||
cp %{SOURCE1} tests/sample_data/compressed/bomb-br-64GiB.bin
|
||||
|
||||
# no color in obs chroot console
|
||||
skiplist="test_pformat"
|
||||
# no online connection to toscrapy.com
|
||||
skiplist="$skiplist or CheckCommandTest"
|
||||
skiplist="$skiplist or CheckCommandTest or test_file_path"
|
||||
# Flaky test gh#scrapy/scrapy#5703
|
||||
skiplist="$skiplist or test_start_requests_laziness"
|
||||
# Fails on 32 bit arches
|
||||
skiplist="$skiplist or test_queue_push_pop_priorities"
|
||||
%{pytest -x \
|
||||
-k "not (${skiplist})" \
|
||||
-W ignore::DeprecationWarning \
|
||||
tests}
|
||||
%endif
|
||||
|
||||
%if %{without test}
|
||||
%pre
|
||||
# If libalternatives is used: Removing old update-alternatives entries.
|
||||
%python_libalternatives_reset_alternative scrapy
|
||||
|
||||
%post
|
||||
%python_install_alternative scrapy
|
||||
@@ -130,10 +177,11 @@ skiplist="$skiplist or test_start_requests_laziness"
|
||||
%license LICENSE
|
||||
%doc AUTHORS README.rst
|
||||
%{python_sitelib}/scrapy
|
||||
%{python_sitelib}/Scrapy-%{version}.dist-info
|
||||
%{python_sitelib}/[Ss]crapy-%{version}.dist-info
|
||||
%python_alternative %{_bindir}/scrapy
|
||||
|
||||
%files -n %{name}-doc
|
||||
%doc docs/build/html
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
|
||||
56
remove-hoverxref.patch
Normal file
56
remove-hoverxref.patch
Normal file
@@ -0,0 +1,56 @@
|
||||
From 549730c23592479f200f3c1f941c59f68c510ff5 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
|
||||
Date: Sat, 28 Jun 2025 12:32:55 +0200
|
||||
Subject: [PATCH] Remove the deprecated sphinx-hoverxref
|
||||
|
||||
---
|
||||
docs/conf.py | 20 +-------------------
|
||||
docs/requirements.txt | 1 -
|
||||
2 files changed, 1 insertion(+), 20 deletions(-)
|
||||
|
||||
diff --git a/docs/conf.py b/docs/conf.py
|
||||
index 493a6297624..0345ec69543 100644
|
||||
--- a/docs/conf.py
|
||||
+++ b/docs/conf.py
|
||||
@@ -26,7 +26,6 @@
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
||||
extensions = [
|
||||
- "hoverxref.extension",
|
||||
"notfound.extension",
|
||||
"scrapydocs",
|
||||
"sphinx.ext.autodoc",
|
||||
@@ -157,22 +156,5 @@
|
||||
}
|
||||
intersphinx_disabled_reftypes: Sequence[str] = []
|
||||
|
||||
-
|
||||
-# -- Options for sphinx-hoverxref extension ----------------------------------
|
||||
-# https://sphinx-hoverxref.readthedocs.io/en/latest/configuration.html
|
||||
-
|
||||
-hoverxref_auto_ref = True
|
||||
-hoverxref_role_types = {
|
||||
- "class": "tooltip",
|
||||
- "command": "tooltip",
|
||||
- "confval": "tooltip",
|
||||
- "hoverxref": "tooltip",
|
||||
- "mod": "tooltip",
|
||||
- "ref": "tooltip",
|
||||
- "reqmeta": "tooltip",
|
||||
- "setting": "tooltip",
|
||||
- "signal": "tooltip",
|
||||
-}
|
||||
-hoverxref_roles = ["command", "reqmeta", "setting", "signal"]
|
||||
-
|
||||
+# -- Other options ------------------------------------------------------------
|
||||
default_dark_mode = False
|
||||
diff --git a/docs/requirements.txt b/docs/requirements.txt
|
||||
index 103fb08d667..4b382b11eb9 100644
|
||||
--- a/docs/requirements.txt
|
||||
+++ b/docs/requirements.txt
|
||||
@@ -1,5 +1,4 @@
|
||||
sphinx==8.1.3
|
||||
-sphinx-hoverxref==1.4.2
|
||||
sphinx-notfound-page==1.0.4
|
||||
sphinx-rtd-theme==3.0.2
|
||||
sphinx-rtd-dark-mode==1.3.0
|
||||
3
scrapy-2.13.3.tar.gz
Normal file
3
scrapy-2.13.3.tar.gz
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bf17588c10e46a9d70c49a05380b749e3c7fba58204a367a5747ce6da2bd204d
|
||||
size 1220051
|
||||
Reference in New Issue
Block a user