14
0
forked from pool/python-Scrapy

- Update CVE-2025-6176.patch to reflect the latest changes upstream to

the patch.
- Remove the CVE-2025-6176-testfile-bomb-br-64GiB.bin source, it's not
  needed anymore.
  gh#scrapy/scrapy#7134, bsc#1252945, CVE-2025-6176)

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-Scrapy?expand=0&rev=53
This commit is contained in:
2025-11-17 11:39:40 +00:00
committed by Git OBS Bridge
commit 56db2db8d1
10 changed files with 2723 additions and 0 deletions

25
.gitattributes vendored Normal file
View File

@@ -0,0 +1,25 @@
## Default LFS
*.7z filter=lfs diff=lfs merge=lfs -text
*.bsp filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.gem filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.jar filter=lfs diff=lfs merge=lfs -text
*.lz filter=lfs diff=lfs merge=lfs -text
*.lzma filter=lfs diff=lfs merge=lfs -text
*.obscpio filter=lfs diff=lfs merge=lfs -text
*.oxt filter=lfs diff=lfs merge=lfs -text
*.pdf filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
*.rpm filter=lfs diff=lfs merge=lfs -text
*.tbz filter=lfs diff=lfs merge=lfs -text
*.tbz2 filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.ttf filter=lfs diff=lfs merge=lfs -text
*.txz filter=lfs diff=lfs merge=lfs -text
*.whl filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
## Specific LFS patterns
CVE-2025-6176-testfile-bomb-br-64GiB.bin filter=lfs diff=lfs merge=lfs -text

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
.osc

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5d5b6139298c899595f784cdd36ff195dbdd479504c4a48d2a8e0a43d2e7a03d
size 51713

613
CVE-2025-6176.patch Normal file
View File

@@ -0,0 +1,613 @@
Index: scrapy-2.13.3/conftest.py
===================================================================
--- scrapy-2.13.3.orig/conftest.py
+++ scrapy-2.13.3/conftest.py
@@ -116,6 +116,16 @@ def requires_boto3(request):
pytest.skip("boto3 is not installed")
+@pytest.fixture(autouse=True)
+def requires_mitmproxy(request):
+ if not request.node.get_closest_marker("requires_mitmproxy"):
+ return
+ try:
+ import mitmproxy # noqa: F401, PLC0415
+ except ImportError:
+ pytest.skip("mitmproxy is not installed")
+
+
def pytest_configure(config):
if config.getoption("--reactor") != "default":
install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
Index: scrapy-2.13.3/pyproject.toml
===================================================================
--- scrapy-2.13.3.orig/pyproject.toml
+++ scrapy-2.13.3/pyproject.toml
@@ -242,6 +242,7 @@ markers = [
"requires_uvloop: marks tests as only enabled when uvloop is known to be working",
"requires_botocore: marks tests that need botocore (but not boto3)",
"requires_boto3: marks tests that need botocore and boto3",
+ "requires_mitmproxy: marks tests that need mitmproxy",
]
filterwarnings = [
"ignore::DeprecationWarning:twisted.web.static"
Index: scrapy-2.13.3/scrapy/downloadermiddlewares/httpcompression.py
===================================================================
--- scrapy-2.13.3.orig/scrapy/downloadermiddlewares/httpcompression.py
+++ scrapy-2.13.3/scrapy/downloadermiddlewares/httpcompression.py
@@ -29,14 +29,20 @@ logger = getLogger(__name__)
ACCEPTED_ENCODINGS: list[bytes] = [b"gzip", b"deflate"]
try:
- try:
- import brotli # noqa: F401
- except ImportError:
- import brotlicffi # noqa: F401
+ import brotli
except ImportError:
pass
else:
- ACCEPTED_ENCODINGS.append(b"br")
+ try:
+ brotli.Decompressor.can_accept_more_data
+ except AttributeError: # pragma: no cover
+ warnings.warn(
+ "You have brotli installed. But 'br' encoding support now requires "
+ "brotli version >= 1.2.0. Please upgrade brotli version to make Scrapy "
+ "decode 'br' encoded responses.",
+ )
+ else:
+ ACCEPTED_ENCODINGS.append(b"br")
try:
import zstandard # noqa: F401
@@ -98,13 +104,13 @@ class HttpCompressionMiddleware:
decoded_body, content_encoding = self._handle_encoding(
response.body, content_encoding, max_size
)
- except _DecompressionMaxSizeExceeded:
+ except _DecompressionMaxSizeExceeded as e:
raise IgnoreRequest(
f"Ignored response {response} because its body "
- f"({len(response.body)} B compressed) exceeded "
- f"DOWNLOAD_MAXSIZE ({max_size} B) during "
- f"decompression."
- )
+ f"({len(response.body)} B compressed, "
+ f"{e.decompressed_size} B decompressed so far) exceeded "
+ f"DOWNLOAD_MAXSIZE ({max_size} B) during decompression."
+ ) from e
if len(response.body) < warn_size <= len(decoded_body):
logger.warning(
f"{response} body size after decompression "
@@ -187,7 +193,7 @@ class HttpCompressionMiddleware:
f"from unsupported encoding(s) '{encodings_str}'."
)
if b"br" in encodings:
- msg += " You need to install brotli or brotlicffi to decode 'br'."
+ msg += " You need to install brotli >= 1.2.0 to decode 'br'."
if b"zstd" in encodings:
msg += " You need to install zstandard to decode 'zstd'."
logger.warning(msg)
Index: scrapy-2.13.3/scrapy/utils/_compression.py
===================================================================
--- scrapy-2.13.3.orig/scrapy/utils/_compression.py
+++ scrapy-2.13.3/scrapy/utils/_compression.py
@@ -1,42 +1,9 @@
import contextlib
import zlib
from io import BytesIO
-from warnings import warn
-
-from scrapy.exceptions import ScrapyDeprecationWarning
-
-try:
- try:
- import brotli
- except ImportError:
- import brotlicffi as brotli
-except ImportError:
- pass
-else:
- try:
- brotli.Decompressor.process
- except AttributeError:
- warn(
- (
- "You have brotlipy installed, and Scrapy will use it, but "
- "Scrapy support for brotlipy is deprecated and will stop "
- "working in a future version of Scrapy. brotlipy itself is "
- "deprecated, it has been superseded by brotlicffi. Please, "
- "uninstall brotlipy and install brotli or brotlicffi instead. "
- "brotlipy has the same import name as brotli, so keeping both "
- "installed is strongly discouraged."
- ),
- ScrapyDeprecationWarning,
- )
-
- def _brotli_decompress(decompressor, data):
- return decompressor.decompress(data)
-
- else:
-
- def _brotli_decompress(decompressor, data):
- return decompressor.process(data)
+with contextlib.suppress(ImportError):
+ import brotli
with contextlib.suppress(ImportError):
import zstandard
@@ -46,62 +13,64 @@ _CHUNK_SIZE = 65536 # 64 KiB
class _DecompressionMaxSizeExceeded(ValueError):
- pass
+ def __init__(self, decompressed_size: int, max_size: int) -> None:
+ self.decompressed_size = decompressed_size
+ self.max_size = max_size
+
+ def __str__(self) -> str:
+ return (
+ f"The number of bytes decompressed so far "
+ f"({self.decompressed_size} B) exceeded the specified maximum "
+ f"({self.max_size} B)."
+ )
+
+
+def _check_max_size(decompressed_size: int, max_size: int) -> None:
+ if max_size and decompressed_size > max_size:
+ raise _DecompressionMaxSizeExceeded(decompressed_size, max_size)
def _inflate(data: bytes, *, max_size: int = 0) -> bytes:
decompressor = zlib.decompressobj()
- raw_decompressor = zlib.decompressobj(wbits=-15)
- input_stream = BytesIO(data)
+ try:
+ first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE)
+ except zlib.error:
+ # to work with raw deflate content that may be sent by microsoft servers.
+ decompressor = zlib.decompressobj(wbits=-15)
+ first_chunk = decompressor.decompress(data, max_length=_CHUNK_SIZE)
+ decompressed_size = len(first_chunk)
+ _check_max_size(decompressed_size, max_size)
output_stream = BytesIO()
- output_chunk = b"."
- decompressed_size = 0
- while output_chunk:
- input_chunk = input_stream.read(_CHUNK_SIZE)
- try:
- output_chunk = decompressor.decompress(input_chunk)
- except zlib.error:
- if decompressor != raw_decompressor:
- # ugly hack to work with raw deflate content that may
- # be sent by microsoft servers. For more information, see:
- # http://carsten.codimi.de/gzip.yaws/
- # http://www.port80software.com/200ok/archive/2005/10/31/868.aspx
- # http://www.gzip.org/zlib/zlib_faq.html#faq38
- decompressor = raw_decompressor
- output_chunk = decompressor.decompress(input_chunk)
- else:
- raise
+ output_stream.write(first_chunk)
+ while decompressor.unconsumed_tail:
+ output_chunk = decompressor.decompress(
+ decompressor.unconsumed_tail, max_length=_CHUNK_SIZE
+ )
decompressed_size += len(output_chunk)
- if max_size and decompressed_size > max_size:
- raise _DecompressionMaxSizeExceeded(
- f"The number of bytes decompressed so far "
- f"({decompressed_size} B) exceed the specified maximum "
- f"({max_size} B)."
- )
+ _check_max_size(decompressed_size, max_size)
output_stream.write(output_chunk)
- output_stream.seek(0)
- return output_stream.read()
+ if tail := decompressor.flush():
+ decompressed_size += len(tail)
+ _check_max_size(decompressed_size, max_size)
+ output_stream.write(tail)
+ return output_stream.getvalue()
def _unbrotli(data: bytes, *, max_size: int = 0) -> bytes:
decompressor = brotli.Decompressor()
- input_stream = BytesIO(data)
+ first_chunk = decompressor.process(data, output_buffer_limit=_CHUNK_SIZE)
+ decompressed_size = len(first_chunk)
+ _check_max_size(decompressed_size, max_size)
output_stream = BytesIO()
- output_chunk = b"."
- decompressed_size = 0
- while output_chunk:
- input_chunk = input_stream.read(_CHUNK_SIZE)
- output_chunk = _brotli_decompress(decompressor, input_chunk)
+ output_stream.write(first_chunk)
+ while not decompressor.is_finished():
+ output_chunk = decompressor.process(b"", output_buffer_limit=_CHUNK_SIZE)
+ if not output_chunk:
+ break
decompressed_size += len(output_chunk)
- if max_size and decompressed_size > max_size:
- raise _DecompressionMaxSizeExceeded(
- f"The number of bytes decompressed so far "
- f"({decompressed_size} B) exceed the specified maximum "
- f"({max_size} B)."
- )
+ _check_max_size(decompressed_size, max_size)
output_stream.write(output_chunk)
- output_stream.seek(0)
- return output_stream.read()
+ return output_stream.getvalue()
def _unzstd(data: bytes, *, max_size: int = 0) -> bytes:
@@ -113,12 +82,6 @@ def _unzstd(data: bytes, *, max_size: in
while output_chunk:
output_chunk = stream_reader.read(_CHUNK_SIZE)
decompressed_size += len(output_chunk)
- if max_size and decompressed_size > max_size:
- raise _DecompressionMaxSizeExceeded(
- f"The number of bytes decompressed so far "
- f"({decompressed_size} B) exceed the specified maximum "
- f"({max_size} B)."
- )
+ _check_max_size(decompressed_size, max_size)
output_stream.write(output_chunk)
- output_stream.seek(0)
- return output_stream.read()
+ return output_stream.getvalue()
Index: scrapy-2.13.3/scrapy/utils/gz.py
===================================================================
--- scrapy-2.13.3.orig/scrapy/utils/gz.py
+++ scrapy-2.13.3/scrapy/utils/gz.py
@@ -5,7 +5,7 @@ from gzip import GzipFile
from io import BytesIO
from typing import TYPE_CHECKING
-from ._compression import _CHUNK_SIZE, _DecompressionMaxSizeExceeded
+from ._compression import _CHUNK_SIZE, _check_max_size
if TYPE_CHECKING:
from scrapy.http import Response
@@ -31,15 +31,9 @@ def gunzip(data: bytes, *, max_size: int
break
raise
decompressed_size += len(chunk)
- if max_size and decompressed_size > max_size:
- raise _DecompressionMaxSizeExceeded(
- f"The number of bytes decompressed so far "
- f"({decompressed_size} B) exceed the specified maximum "
- f"({max_size} B)."
- )
+ _check_max_size(decompressed_size, max_size)
output_stream.write(chunk)
- output_stream.seek(0)
- return output_stream.read()
+ return output_stream.getvalue()
def gzip_magic_number(response: Response) -> bool:
Index: scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
===================================================================
--- scrapy-2.13.3.orig/tests/test_downloadermiddleware_httpcompression.py
+++ scrapy-2.13.3/tests/test_downloadermiddleware_httpcompression.py
@@ -51,6 +51,22 @@ FORMAT = {
}
+def _skip_if_no_br() -> None:
+ try:
+ import brotli # noqa: PLC0415
+
+ brotli.Decompressor.can_accept_more_data
+ except (ImportError, AttributeError):
+ pytest.skip("no brotli support")
+
+
+def _skip_if_no_zstd() -> None:
+ try:
+ import zstandard # noqa: F401,PLC0415
+ except ImportError:
+ pytest.skip("no zstd support (zstandard)")
+
+
class TestHttpCompression:
def setup_method(self):
self.crawler = get_crawler(Spider)
@@ -124,13 +140,7 @@ class TestHttpCompression:
self.assertStatsEqual("httpcompression/response_bytes", 74837)
def test_process_response_br(self):
- try:
- try:
- import brotli # noqa: F401
- except ImportError:
- import brotlicffi # noqa: F401
- except ImportError:
- raise SkipTest("no brotli")
+ _skip_if_no_br()
response = self._getresponse("br")
request = response.request
assert response.headers["Content-Encoding"] == b"br"
@@ -143,14 +153,8 @@ class TestHttpCompression:
def test_process_response_br_unsupported(self):
try:
- try:
- import brotli # noqa: F401
-
- raise SkipTest("Requires not having brotli support")
- except ImportError:
- import brotlicffi # noqa: F401
-
- raise SkipTest("Requires not having brotli support")
+ import brotli # noqa: F401,PLC0415
+ pytest.skip("Requires not having brotli support")
except ImportError:
pass
response = self._getresponse("br")
@@ -169,7 +173,7 @@ class TestHttpCompression:
(
"HttpCompressionMiddleware cannot decode the response for"
" http://scrapytest.org/ from unsupported encoding(s) 'br'."
- " You need to install brotli or brotlicffi to decode 'br'."
+ " You need to install brotli >= 1.2.0 to decode 'br'."
),
),
)
@@ -503,24 +507,19 @@ class TestHttpCompression:
self.assertStatsEqual("httpcompression/response_bytes", None)
def _test_compression_bomb_setting(self, compression_id):
- settings = {"DOWNLOAD_MAXSIZE": 10_000_000}
+ settings = {"DOWNLOAD_MAXSIZE": 1_000_000}
crawler = get_crawler(Spider, settings_dict=settings)
spider = crawler._create_spider("scrapytest.org")
mw = HttpCompressionMiddleware.from_crawler(crawler)
mw.open_spider(spider)
- response = self._getresponse(f"bomb-{compression_id}")
- with pytest.raises(IgnoreRequest):
+ response = self._getresponse(f"bomb-{compression_id}") # 11_511_612 B
+ with pytest.raises(IgnoreRequest) as exc_info:
mw.process_response(response.request, response, spider)
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
def test_compression_bomb_setting_br(self):
- try:
- try:
- import brotli # noqa: F401
- except ImportError:
- import brotlicffi # noqa: F401
- except ImportError:
- raise SkipTest("no brotli")
+ _skip_if_no_br()
self._test_compression_bomb_setting("br")
def test_compression_bomb_setting_deflate(self):
@@ -538,7 +537,7 @@ class TestHttpCompression:
def _test_compression_bomb_spider_attr(self, compression_id):
class DownloadMaxSizeSpider(Spider):
- download_maxsize = 10_000_000
+ download_maxsize = 1_000_000
crawler = get_crawler(DownloadMaxSizeSpider)
spider = crawler._create_spider("scrapytest.org")
@@ -546,17 +545,12 @@ class TestHttpCompression:
mw.open_spider(spider)
response = self._getresponse(f"bomb-{compression_id}")
- with pytest.raises(IgnoreRequest):
+ with pytest.raises(IgnoreRequest) as exc_info:
mw.process_response(response.request, response, spider)
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
def test_compression_bomb_spider_attr_br(self):
- try:
- try:
- import brotli # noqa: F401
- except ImportError:
- import brotlicffi # noqa: F401
- except ImportError:
- raise SkipTest("no brotli")
+ _skip_if_no_br()
self._test_compression_bomb_spider_attr("br")
def test_compression_bomb_spider_attr_deflate(self):
@@ -579,18 +573,13 @@ class TestHttpCompression:
mw.open_spider(spider)
response = self._getresponse(f"bomb-{compression_id}")
- response.meta["download_maxsize"] = 10_000_000
- with pytest.raises(IgnoreRequest):
+ response.meta["download_maxsize"] = 1_000_000
+ with pytest.raises(IgnoreRequest) as exc_info:
mw.process_response(response.request, response, spider)
+ assert exc_info.value.__cause__.decompressed_size < 1_100_000
def test_compression_bomb_request_meta_br(self):
- try:
- try:
- import brotli # noqa: F401
- except ImportError:
- import brotlicffi # noqa: F401
- except ImportError:
- raise SkipTest("no brotli")
+ _skip_if_no_br()
self._test_compression_bomb_request_meta("br")
def test_compression_bomb_request_meta_deflate(self):
@@ -633,13 +622,7 @@ class TestHttpCompression:
)
def test_download_warnsize_setting_br(self):
- try:
- try:
- import brotli # noqa: F401
- except ImportError:
- import brotlicffi # noqa: F401
- except ImportError:
- raise SkipTest("no brotli")
+ _skip_if_no_br()
self._test_download_warnsize_setting("br")
def test_download_warnsize_setting_deflate(self):
@@ -684,13 +667,7 @@ class TestHttpCompression:
)
def test_download_warnsize_spider_attr_br(self):
- try:
- try:
- import brotli # noqa: F401
- except ImportError:
- import brotlicffi # noqa: F401
- except ImportError:
- raise SkipTest("no brotli")
+ _skip_if_no_br()
self._test_download_warnsize_spider_attr("br")
def test_download_warnsize_spider_attr_deflate(self):
@@ -733,13 +710,7 @@ class TestHttpCompression:
)
def test_download_warnsize_request_meta_br(self):
- try:
- try:
- import brotli # noqa: F401
- except ImportError:
- import brotlicffi # noqa: F401
- except ImportError:
- raise SkipTest("no brotli")
+ _skip_if_no_br()
self._test_download_warnsize_request_meta("br")
def test_download_warnsize_request_meta_deflate(self):
@@ -754,3 +725,34 @@ class TestHttpCompression:
except ImportError:
raise SkipTest("no zstd support (zstandard)")
self._test_download_warnsize_request_meta("zstd")
+
+ def _get_truncated_response(self, compression_id):
+ crawler = get_crawler(Spider)
+ spider = crawler._create_spider("scrapytest.org")
+ mw = HttpCompressionMiddleware.from_crawler(crawler)
+ mw.open_spider(spider)
+ response = self._getresponse(compression_id)
+ truncated_body = response.body[: len(response.body) // 2]
+ response = response.replace(body=truncated_body)
+ return mw.process_response(response.request, response, spider)
+
+ def test_process_truncated_response_br(self):
+ _skip_if_no_br()
+ resp = self._get_truncated_response("br")
+ assert resp.body.startswith(b"<!DOCTYPE")
+
+ def test_process_truncated_response_zlibdeflate(self):
+ resp = self._get_truncated_response("zlibdeflate")
+ assert resp.body.startswith(b"<!DOCTYPE")
+
+ def test_process_truncated_response_gzip(self):
+ resp = self._get_truncated_response("gzip")
+ assert resp.body.startswith(b"<!DOCTYPE")
+
+ def test_process_truncated_response_zstd(self):
+ _skip_if_no_zstd()
+ for check_key in FORMAT:
+ if not check_key.startswith("zstd-"):
+ continue
+ resp = self._get_truncated_response(check_key)
+ assert len(resp.body) == 0
Index: scrapy-2.13.3/tests/test_proxy_connect.py
===================================================================
--- scrapy-2.13.3.orig/tests/test_proxy_connect.py
+++ scrapy-2.13.3/tests/test_proxy_connect.py
@@ -62,6 +62,7 @@ def _wrong_credentials(proxy_url):
return urlunsplit(bad_auth_proxy)
+@pytest.mark.requires_mitmproxy
class TestProxyConnect(TestCase):
@classmethod
def setUpClass(cls):
@@ -73,13 +74,7 @@ class TestProxyConnect(TestCase):
cls.mockserver.__exit__(None, None, None)
def setUp(self):
- try:
- import mitmproxy # noqa: F401
- except ImportError:
- pytest.skip("mitmproxy is not installed")
-
self._oldenv = os.environ.copy()
-
self._proxy = MitmProxy()
proxy_url = self._proxy.start()
os.environ["https_proxy"] = proxy_url
Index: scrapy-2.13.3/tox.ini
===================================================================
--- scrapy-2.13.3.orig/tox.ini
+++ scrapy-2.13.3/tox.ini
@@ -112,9 +112,6 @@ deps =
w3lib==1.17.0
zope.interface==5.1.0
{[test-requirements]deps}
-
- # mitmproxy 8.0.0 requires upgrading some of the pinned dependencies
- # above, hence we do not install it in pinned environments at the moment
setenv =
_SCRAPY_PINNED=true
install_command =
@@ -141,8 +138,7 @@ deps =
Twisted[http2]
boto3
bpython # optional for shell wrapper tests
- brotli; implementation_name != "pypy" # optional for HTTP compress downloader middleware tests
- brotlicffi; implementation_name == "pypy" # optional for HTTP compress downloader middleware tests
+ brotli >= 1.2.0 # optional for HTTP compress downloader middleware tests
google-cloud-storage
ipython
robotexclusionrulesparser
@@ -156,9 +152,7 @@ deps =
Pillow==8.0.0
boto3==1.20.0
bpython==0.7.1
- brotli==0.5.2; implementation_name != "pypy"
- brotlicffi==0.8.0; implementation_name == "pypy"
- brotlipy
+ brotli==1.2.0
google-cloud-storage==1.29.0
ipython==2.0.0
robotexclusionrulesparser==1.6.2
@@ -258,7 +252,7 @@ deps =
{[testenv]deps}
botocore>=1.4.87
commands =
- pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore.junit.xml -o junit_family=legacy -m requires_botocore}
+ pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore.junit.xml -o junit_family=legacy} -m requires_botocore
[testenv:botocore-pinned]
basepython = {[pinned]basepython}
@@ -269,4 +263,17 @@ install_command = {[pinned]install_comma
setenv =
{[pinned]setenv}
commands =
- pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore-pinned.junit.xml -o junit_family=legacy -m requires_botocore}
+ pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore-pinned.junit.xml -o junit_family=legacy} -m requires_botocore
+
+
+# Run proxy tests that use mitmproxy in a separate env to avoid installing
+# numerous mitmproxy deps in other envs (even in extra-deps), as they can
+# conflict with other deps we want, or don't want, to have installed there.
+
+[testenv:mitmproxy]
+deps =
+ {[testenv]deps}
+ # mitmproxy does not support PyPy
+ mitmproxy; implementation_name != "pypy"
+commands =
+ pytest {posargs:--cov-config=pyproject.toml --cov=scrapy --cov-report=xml --cov-report= tests --junitxml=botocore.junit.xml -o junit_family=legacy} -m requires_mitmproxy

3
_multibuild Normal file
View File

@@ -0,0 +1,3 @@
<multibuild>
<package>test</package>
</multibuild>

18
no-dark-mode.patch Normal file
View File

@@ -0,0 +1,18 @@
Index: scrapy-2.13.3/docs/conf.py
===================================================================
--- scrapy-2.13.3.orig/docs/conf.py
+++ scrapy-2.13.3/docs/conf.py
@@ -34,7 +34,7 @@ extensions = [
"sphinx.ext.coverage",
"sphinx.ext.intersphinx",
"sphinx.ext.viewcode",
- "sphinx_rtd_dark_mode",
+ "sphinx_rtd_theme",
]
templates_path = ["_templates"]
@@ -158,4 +158,3 @@ intersphinx_mapping = {
intersphinx_disabled_reftypes: Sequence[str] = []
# -- Other options ------------------------------------------------------------
-default_dark_mode = False

1819
python-Scrapy.changes Normal file

File diff suppressed because it is too large Load Diff

182
python-Scrapy.spec Normal file
View File

@@ -0,0 +1,182 @@
#
# spec file for package python-Scrapy
#
# Copyright (c) 2025 SUSE LLC and contributors
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via https://bugs.opensuse.org/
#
%global flavor @BUILD_FLAVOR@%{nil}
%if "%{flavor}" == "test"
%define psuffix -test
%bcond_without test
%endif
%if "%{flavor}" == ""
%define psuffix %{nil}
%bcond_with test
%endif
%if 0%{?suse_version} > 1500
%bcond_without libalternatives
%else
%bcond_with libalternatives
%endif
%{?sle15_python_module_pythons}
Name: python-Scrapy%{?psuffix}
Version: 2.13.3
Release: 0
Summary: A high-level Python Screen Scraping framework
License: BSD-3-Clause
URL: https://scrapy.org
Source: https://files.pythonhosted.org/packages/source/s/scrapy/scrapy-%{version}.tar.gz
# PATCH-FIX-UPSTREAM gh#scrapy/scrapy#6922
Patch0: remove-hoverxref.patch
# PATCH-FIX-OPENSUSE No sphinx-rtd-dark-mode
Patch1: no-dark-mode.patch
# PATCH-FIX-UPSTREAM CVE-2025-6176.patch gh#scrapy/scrapy#7134
Patch2: CVE-2025-6176.patch
BuildRequires: %{python_module base >= 3.9}
BuildRequires: %{python_module hatchling}
BuildRequires: %{python_module pip}
BuildRequires: %{python_module wheel}
%if %{with test}
# Test requirements:
BuildRequires: %{python_module Scrapy = %{version}}
BuildRequires: %{python_module Brotli >= 1.2.0}
BuildRequires: %{python_module Pillow}
BuildRequires: %{python_module Protego}
BuildRequires: %{python_module PyDispatcher >= 2.0.5}
BuildRequires: %{python_module Twisted >= 18.9.0}
BuildRequires: %{python_module attrs}
BuildRequires: %{python_module botocore >= 1.4.87}
BuildRequires: %{python_module cryptography >= 36.0.0}
BuildRequires: %{python_module cssselect >= 0.9.1}
BuildRequires: %{python_module dbm}
BuildRequires: %{python_module defusedxml >= 0.7.1}
BuildRequires: %{python_module itemadapter >= 0.1.0}
BuildRequires: %{python_module itemloaders >= 1.0.1}
BuildRequires: %{python_module lxml >= 4.4.1}
BuildRequires: %{python_module parsel >= 1.5.0}
BuildRequires: %{python_module pexpect >= 4.8.1}
BuildRequires: %{python_module pyOpenSSL >= 21.0.0}
BuildRequires: %{python_module pyftpdlib >= 1.5.8}
BuildRequires: %{python_module pytest-xdist}
BuildRequires: %{python_module pytest}
BuildRequires: %{python_module queuelib >= 1.4.2}
BuildRequires: %{python_module service_identity >= 18.1.0}
BuildRequires: %{python_module sybil}
BuildRequires: %{python_module testfixtures}
BuildRequires: %{python_module tldextract}
BuildRequires: %{python_module uvloop}
BuildRequires: %{python_module w3lib >= 1.17.0}
BuildRequires: %{python_module zope.interface >= 5.1.0}
%endif
BuildRequires: fdupes
BuildRequires: python-rpm-macros
BuildRequires: python3-Sphinx
BuildRequires: python3-sphinx-notfound-page
BuildRequires: python3-sphinx_rtd_theme
Requires: python-Protego >= 0.1.15
Requires: python-PyDispatcher >= 2.0.5
Requires: python-Twisted >= 18.9.0
Requires: python-cryptography >= 36.0.0
Requires: python-cssselect >= 0.9.1
Requires: python-defusedxml >= 0.7.1
Requires: python-itemadapter >= 0.1.0
Requires: python-itemloaders >= 1.0.1
Requires: python-lxml >= 4.4.1
Requires: python-packaging
Requires: python-parsel >= 1.5.0
Requires: python-pyOpenSSL >= 21.0.0
Requires: python-queuelib >= 1.4.2
Requires: python-service_identity >= 18.1.0
Requires: python-tldextract
Requires: python-w3lib >= 1.17.2
Requires: python-zope.interface >= 5.1.0
BuildArch: noarch
%if %{with libalternatives}
BuildRequires: alts
Requires: alts
%else
Requires(post): update-alternatives
Requires(postun): update-alternatives
%endif
%python_subpackages
%description
Scrapy is a high level scraping and web crawling framework for writing spiders
to crawl and parse web pages for all kinds of purposes, from information
retrieval to monitoring or testing web sites.
%package -n %{name}-doc
Summary: Documentation for %{name}
%description -n %{name}-doc
Provides documentation for %{name}.
%prep
%autosetup -p1 -n scrapy-%{version}
sed -i -e 's:= python:= python3:g' docs/Makefile
%if %{without test}
%build
%pyproject_wheel
pushd docs
%make_build html && rm -r build/html/.buildinfo
popd
%install
%pyproject_install
%python_clone -a %{buildroot}%{_bindir}/scrapy
%python_expand %fdupes %{buildroot}%{$python_sitelib}
%endif
%if %{with test}
%check
# no color in obs chroot console
skiplist="test_pformat"
# no online connection to toscrapy.com
skiplist="$skiplist or CheckCommandTest or test_file_path"
# Flaky test gh#scrapy/scrapy#5703
skiplist="$skiplist or test_start_requests_laziness"
# Fails on 32 bit arches
skiplist="$skiplist or test_queue_push_pop_priorities"
%{pytest -x \
-k "not (${skiplist})" \
-W ignore::DeprecationWarning \
tests}
%endif
%if %{without test}
%pre
# If libalternatives is used: Removing old update-alternatives entries.
%python_libalternatives_reset_alternative scrapy
%post
%python_install_alternative scrapy
%postun
%python_uninstall_alternative scrapy
%files %{python_files}
%license LICENSE
%doc AUTHORS README.rst
%{python_sitelib}/scrapy
%{python_sitelib}/[Ss]crapy-%{version}.dist-info
%python_alternative %{_bindir}/scrapy
%files -n %{name}-doc
%doc docs/build/html
%endif
%changelog

56
remove-hoverxref.patch Normal file
View File

@@ -0,0 +1,56 @@
From 549730c23592479f200f3c1f941c59f68c510ff5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
Date: Sat, 28 Jun 2025 12:32:55 +0200
Subject: [PATCH] Remove the deprecated sphinx-hoverxref
---
docs/conf.py | 20 +-------------------
docs/requirements.txt | 1 -
2 files changed, 1 insertion(+), 20 deletions(-)
diff --git a/docs/conf.py b/docs/conf.py
index 493a6297624..0345ec69543 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -26,7 +26,6 @@
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
extensions = [
- "hoverxref.extension",
"notfound.extension",
"scrapydocs",
"sphinx.ext.autodoc",
@@ -157,22 +156,5 @@
}
intersphinx_disabled_reftypes: Sequence[str] = []
-
-# -- Options for sphinx-hoverxref extension ----------------------------------
-# https://sphinx-hoverxref.readthedocs.io/en/latest/configuration.html
-
-hoverxref_auto_ref = True
-hoverxref_role_types = {
- "class": "tooltip",
- "command": "tooltip",
- "confval": "tooltip",
- "hoverxref": "tooltip",
- "mod": "tooltip",
- "ref": "tooltip",
- "reqmeta": "tooltip",
- "setting": "tooltip",
- "signal": "tooltip",
-}
-hoverxref_roles = ["command", "reqmeta", "setting", "signal"]
-
+# -- Other options ------------------------------------------------------------
default_dark_mode = False
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 103fb08d667..4b382b11eb9 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,5 +1,4 @@
sphinx==8.1.3
-sphinx-hoverxref==1.4.2
sphinx-notfound-page==1.0.4
sphinx-rtd-theme==3.0.2
sphinx-rtd-dark-mode==1.3.0

3
scrapy-2.13.3.tar.gz Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bf17588c10e46a9d70c49a05380b749e3c7fba58204a367a5747ce6da2bd204d
size 1220051