14
0
forked from pool/python-Scrapy

- Add patch twisted-23.8.0-compat.patch gh#scrapy/scrapy#6064

- Update to 2.11.0:
  - Spiders can now modify settings in their from_crawler methods,
    e.g. based on spider arguments.
  - Periodic logging of stats.
  - Bug fixes.
- 2.10.0:
  - Added Python 3.12 support, dropped Python 3.7 support.
  - The new add-ons framework simplifies configuring 3rd-party
    components that support it.
  - Exceptions to retry can now be configured.
  - Many fixes and improvements for feed exports.
- 2.9.0:
  - Per-domain download settings.
  - Compatibility with new cryptography and new parsel.
  - JMESPath selectors from the new parsel.
  - Bug fixes.
- 2.8.0:
  - This is a maintenance release, with minor features, bug fixes, and
    cleanups.

OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-Scrapy?expand=0&rev=34
This commit is contained in:
2024-01-10 07:53:57 +00:00
committed by Git OBS Bridge
parent 47fd8f7029
commit f93a35cd30
5 changed files with 303 additions and 25 deletions

3
Scrapy-2.11.0.tar.gz Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3cbdedce0c3f0e0482d61be2d7458683be7cd7cf14b0ee6adfbaddb80f5b36a5
size 1171092

View File

@@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:30fa408353d24b1df979df2ea4afbd19b4ae02fb2207f218d246332f1e1cf14e
size 1131084

View File

@@ -1,3 +1,27 @@
-------------------------------------------------------------------
Wed Jan 10 07:50:52 UTC 2024 - Daniel Garcia <daniel.garcia@suse.com>
- Add patch twisted-23.8.0-compat.patch gh#scrapy/scrapy#6064
- Update to 2.11.0:
- Spiders can now modify settings in their from_crawler methods,
e.g. based on spider arguments.
- Periodic logging of stats.
- Bug fixes.
- 2.10.0:
- Added Python 3.12 support, dropped Python 3.7 support.
- The new add-ons framework simplifies configuring 3rd-party
components that support it.
- Exceptions to retry can now be configured.
- Many fixes and improvements for feed exports.
- 2.9.0:
- Per-domain download settings.
- Compatibility with new cryptography and new parsel.
- JMESPath selectors from the new parsel.
- Bug fixes.
- 2.8.0:
- This is a maintenance release, with minor features, bug fixes, and
cleanups.
------------------------------------------------------------------- -------------------------------------------------------------------
Mon Nov 7 20:35:15 UTC 2022 - Yogalakshmi Arunachalam <yarunachalam@suse.com> Mon Nov 7 20:35:15 UTC 2022 - Yogalakshmi Arunachalam <yarunachalam@suse.com>

View File

@@ -1,7 +1,7 @@
# #
# spec file for package python-Scrapy # spec file for package python-Scrapy
# #
# Copyright (c) 2022 SUSE LLC # Copyright (c) 2024 SUSE LLC
# #
# All modifications and additions to the file contributed by third parties # All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed # remain the property of their copyright owners, unless otherwise agreed
@@ -16,62 +16,63 @@
# #
%{?!python_module:%define python_module() python3-%{**}}
%define skip_python2 1
Name: python-Scrapy Name: python-Scrapy
Version: 2.7.1 Version: 2.11.0
Release: 0 Release: 0
Summary: A high-level Python Screen Scraping framework Summary: A high-level Python Screen Scraping framework
License: BSD-3-Clause License: BSD-3-Clause
Group: Development/Languages/Python Group: Development/Languages/Python
URL: https://scrapy.org URL: https://scrapy.org
Source: https://files.pythonhosted.org/packages/source/S/Scrapy/Scrapy-%{version}.tar.gz Source: https://files.pythonhosted.org/packages/source/S/Scrapy/Scrapy-%{version}.tar.gz
# PATCH-FIX-UPSTREAM twisted-23.8.0-compat.patch gh#scrapy/scrapy#6064
Patch1: twisted-23.8.0-compat.patch
BuildRequires: %{python_module Pillow} BuildRequires: %{python_module Pillow}
BuildRequires: %{python_module Protego >= 0.1.15} BuildRequires: %{python_module Protego >= 0.1.15}
BuildRequires: %{python_module PyDispatcher >= 2.0.5} BuildRequires: %{python_module PyDispatcher >= 2.0.5}
BuildRequires: %{python_module Twisted >= 17.9.0} BuildRequires: %{python_module Twisted >= 18.9.0}
BuildRequires: %{python_module botocore} BuildRequires: %{python_module attrs}
BuildRequires: %{python_module cryptography >= 2.0} BuildRequires: %{python_module botocore >= 1.4.87}
BuildRequires: %{python_module cryptography >= 36.0.0}
BuildRequires: %{python_module cssselect >= 0.9.1} BuildRequires: %{python_module cssselect >= 0.9.1}
BuildRequires: %{python_module dbm} BuildRequires: %{python_module dbm}
BuildRequires: %{python_module itemadapter >= 0.1.0} BuildRequires: %{python_module itemadapter >= 0.1.0}
BuildRequires: %{python_module itemloaders >= 1.0.1} BuildRequires: %{python_module itemloaders >= 1.0.1}
BuildRequires: %{python_module jmespath} BuildRequires: %{python_module lxml >= 4.4.1}
BuildRequires: %{python_module lxml >= 3.5.0}
BuildRequires: %{python_module parsel >= 1.5.0} BuildRequires: %{python_module parsel >= 1.5.0}
BuildRequires: %{python_module pyOpenSSL >= 16.2.0} BuildRequires: %{python_module pexpect >= 4.8.1}
BuildRequires: %{python_module pyOpenSSL >= 21.0.0}
BuildRequires: %{python_module pyftpdlib} BuildRequires: %{python_module pyftpdlib}
BuildRequires: %{python_module pytest-xdist} BuildRequires: %{python_module pytest-xdist}
BuildRequires: %{python_module pytest} BuildRequires: %{python_module pytest}
BuildRequires: %{python_module queuelib >= 1.4.2} BuildRequires: %{python_module queuelib >= 1.4.2}
BuildRequires: %{python_module service_identity >= 16.0.0} BuildRequires: %{python_module service_identity >= 18.1.0}
BuildRequires: %{python_module setuptools} BuildRequires: %{python_module setuptools}
BuildRequires: %{python_module sybil} BuildRequires: %{python_module sybil}
BuildRequires: %{python_module testfixtures >= 6.0.0} BuildRequires: %{python_module testfixtures}
BuildRequires: %{python_module tldextract} BuildRequires: %{python_module tldextract}
BuildRequires: %{python_module uvloop} BuildRequires: %{python_module uvloop}
BuildRequires: %{python_module w3lib >= 1.17.0} BuildRequires: %{python_module w3lib >= 1.17.0}
BuildRequires: %{python_module zope.interface >= 4.1.3} BuildRequires: %{python_module zope.interface >= 5.1.0}
BuildRequires: fdupes BuildRequires: fdupes
BuildRequires: python-rpm-macros BuildRequires: python-rpm-macros
BuildRequires: python3-Sphinx BuildRequires: python3-Sphinx
BuildRequires: (python3-dataclasses if python3-base < 3.7) BuildRequires: (python3-dataclasses if python3-base < 3.7)
Requires: python-Protego >= 0.1.15 Requires: python-Protego >= 0.1.15
Requires: python-PyDispatcher >= 2.0.5 Requires: python-PyDispatcher >= 2.0.5
Requires: python-Twisted >= 17.9.0 Requires: python-Twisted >= 18.9.0
Requires: python-cryptography >= 2.0 Requires: python-cryptography >= 36.0.0
Requires: python-cssselect >= 0.9.1 Requires: python-cssselect >= 0.9.1
Requires: python-itemadapter >= 0.1.0 Requires: python-itemadapter >= 0.1.0
Requires: python-itemloaders >= 1.0.1 Requires: python-itemloaders >= 1.0.1
Requires: python-lxml >= 3.5.0 Requires: python-lxml >= 4.4.1
Requires: python-parsel >= 1.5.0 Requires: python-parsel >= 1.5.0
Requires: python-pyOpenSSL >= 16.2.0 Requires: python-pyOpenSSL >= 21.0.0
Requires: python-queuelib >= 1.4.2 Requires: python-queuelib >= 1.4.2
Requires: python-service_identity >= 16.0.0 Requires: python-service_identity >= 18.1.0
Requires: python-setuptools Requires: python-setuptools
Requires: python-tldextract Requires: python-tldextract
Requires: python-w3lib >= 1.17.2 Requires: python-w3lib >= 1.17.2
Requires: python-zope.interface >= 4.1.3 Requires: python-zope.interface >= 5.1.0
Requires(post): update-alternatives Requires(post): update-alternatives
Requires(postun):update-alternatives Requires(postun):update-alternatives
BuildArch: noarch BuildArch: noarch
@@ -90,8 +91,7 @@ Group: Documentation/HTML
Provides documentation for %{name}. Provides documentation for %{name}.
%prep %prep
%setup -n Scrapy-%{version} %autosetup -p1 -n Scrapy-%{version}
%autopatch -p1
sed -i -e 's:= python:= python3:g' docs/Makefile sed -i -e 's:= python:= python3:g' docs/Makefile
@@ -111,7 +111,7 @@ popd
skiplist="test_pformat" skiplist="test_pformat"
# no online connection to toscrapy.com # no online connection to toscrapy.com
skiplist="$skiplist or CheckCommandTest" skiplist="$skiplist or CheckCommandTest"
%{pytest \ %{pytest -x \
-k "not (${skiplist})" \ -k "not (${skiplist})" \
-W ignore::DeprecationWarning \ -W ignore::DeprecationWarning \
tests} tests}

254
twisted-23.8.0-compat.patch Normal file
View File

@@ -0,0 +1,254 @@
Index: Scrapy-2.11.0/scrapy/crawler.py
===================================================================
--- Scrapy-2.11.0.orig/scrapy/crawler.py
+++ Scrapy-2.11.0/scrapy/crawler.py
@@ -404,8 +404,8 @@ class CrawlerProcess(CrawlerRunner):
:param bool stop_after_crawl: stop or not the reactor when all
crawlers have finished
- :param bool install_signal_handlers: whether to install the shutdown
- handlers (default: True)
+ :param bool install_signal_handlers: whether to install the OS signal
+ handlers from Twisted and Scrapy (default: True)
"""
from twisted.internet import reactor
@@ -416,15 +416,17 @@ class CrawlerProcess(CrawlerRunner):
return
d.addBoth(self._stop_reactor)
- if install_signal_handlers:
- install_shutdown_handlers(self._signal_shutdown)
resolver_class = load_object(self.settings["DNS_RESOLVER"])
resolver = create_instance(resolver_class, self.settings, self, reactor=reactor)
resolver.install_on_reactor()
tp = reactor.getThreadPool()
tp.adjustPoolsize(maxthreads=self.settings.getint("REACTOR_THREADPOOL_MAXSIZE"))
reactor.addSystemEventTrigger("before", "shutdown", self.stop)
- reactor.run(installSignalHandlers=False) # blocking call
+ if install_signal_handlers:
+ reactor.addSystemEventTrigger(
+ "after", "startup", install_shutdown_handlers, self._signal_shutdown
+ )
+ reactor.run(installSignalHandlers=install_signal_handlers) # blocking call
def _graceful_stop_reactor(self) -> Deferred:
d = self.stop()
Index: Scrapy-2.11.0/scrapy/utils/ossignal.py
===================================================================
--- Scrapy-2.11.0.orig/scrapy/utils/ossignal.py
+++ Scrapy-2.11.0/scrapy/utils/ossignal.py
@@ -19,13 +19,10 @@ def install_shutdown_handlers(
function: SignalHandlerT, override_sigint: bool = True
) -> None:
"""Install the given function as a signal handler for all common shutdown
- signals (such as SIGINT, SIGTERM, etc). If override_sigint is ``False`` the
- SIGINT handler won't be install if there is already a handler in place
- (e.g. Pdb)
+ signals (such as SIGINT, SIGTERM, etc). If ``override_sigint`` is ``False`` the
+ SIGINT handler won't be installed if there is already a handler in place
+ (e.g. Pdb)
"""
- from twisted.internet import reactor
-
- reactor._handleSignals()
signal.signal(signal.SIGTERM, function)
if signal.getsignal(signal.SIGINT) == signal.default_int_handler or override_sigint:
signal.signal(signal.SIGINT, function)
Index: Scrapy-2.11.0/scrapy/utils/testproc.py
===================================================================
--- Scrapy-2.11.0.orig/scrapy/utils/testproc.py
+++ Scrapy-2.11.0/scrapy/utils/testproc.py
@@ -2,7 +2,7 @@ from __future__ import annotations
import os
import sys
-from typing import Iterable, Optional, Tuple, cast
+from typing import Iterable, List, Optional, Tuple, cast
from twisted.internet.defer import Deferred
from twisted.internet.error import ProcessTerminated
@@ -26,14 +26,15 @@ class ProcessTest:
env = os.environ.copy()
if settings is not None:
env["SCRAPY_SETTINGS_MODULE"] = settings
+ assert self.command
cmd = self.prefix + [self.command] + list(args)
pp = TestProcessProtocol()
- pp.deferred.addBoth(self._process_finished, cmd, check_code)
+ pp.deferred.addCallback(self._process_finished, cmd, check_code)
reactor.spawnProcess(pp, cmd[0], cmd, env=env, path=self.cwd)
return pp.deferred
def _process_finished(
- self, pp: TestProcessProtocol, cmd: str, check_code: bool
+ self, pp: TestProcessProtocol, cmd: List[str], check_code: bool
) -> Tuple[int, bytes, bytes]:
if pp.exitcode and check_code:
msg = f"process {cmd} exit with code {pp.exitcode}"
Index: Scrapy-2.11.0/setup.py
===================================================================
--- Scrapy-2.11.0.orig/setup.py
+++ Scrapy-2.11.0/setup.py
@@ -6,8 +6,7 @@ version = (Path(__file__).parent / "scra
install_requires = [
- # 23.8.0 incompatibility: https://github.com/scrapy/scrapy/issues/6024
- "Twisted>=18.9.0,<23.8.0",
+ "Twisted>=18.9.0",
"cryptography>=36.0.0",
"cssselect>=0.9.1",
"itemloaders>=1.0.1",
Index: Scrapy-2.11.0/tests/CrawlerProcess/sleeping.py
===================================================================
--- /dev/null
+++ Scrapy-2.11.0/tests/CrawlerProcess/sleeping.py
@@ -0,0 +1,24 @@
+from twisted.internet.defer import Deferred
+
+import scrapy
+from scrapy.crawler import CrawlerProcess
+from scrapy.utils.defer import maybe_deferred_to_future
+
+
+class SleepingSpider(scrapy.Spider):
+ name = "sleeping"
+
+ start_urls = ["data:,;"]
+
+ async def parse(self, response):
+ from twisted.internet import reactor
+
+ d = Deferred()
+ reactor.callLater(3, d.callback, None)
+ await maybe_deferred_to_future(d)
+
+
+process = CrawlerProcess(settings={})
+
+process.crawl(SleepingSpider)
+process.start()
Index: Scrapy-2.11.0/tests/requirements.txt
===================================================================
--- Scrapy-2.11.0.orig/tests/requirements.txt
+++ Scrapy-2.11.0/tests/requirements.txt
@@ -1,5 +1,6 @@
# Tests requirements
attrs
+pexpect >= 4.8.0
# https://github.com/giampaolo/pyftpdlib/issues/560
pyftpdlib; python_version < "3.12"
pytest
Index: Scrapy-2.11.0/tests/test_command_shell.py
===================================================================
--- Scrapy-2.11.0.orig/tests/test_command_shell.py
+++ Scrapy-2.11.0/tests/test_command_shell.py
@@ -1,11 +1,15 @@
+import sys
+from io import BytesIO
from pathlib import Path
+from pexpect.popen_spawn import PopenSpawn
from twisted.internet import defer
from twisted.trial import unittest
from scrapy.utils.testproc import ProcessTest
from scrapy.utils.testsite import SiteTest
from tests import NON_EXISTING_RESOLVABLE, tests_datadir
+from tests.mockserver import MockServer
class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
@@ -133,3 +137,25 @@ class ShellTest(ProcessTest, SiteTest, u
args = ["-c", code, "--set", f"TWISTED_REACTOR={reactor_path}"]
_, _, err = yield self.execute(args, check_code=True)
self.assertNotIn(b"RuntimeError: There is no current event loop in thread", err)
+
+
+class InteractiveShellTest(unittest.TestCase):
+ def test_fetch(self):
+ args = (
+ sys.executable,
+ "-m",
+ "scrapy.cmdline",
+ "shell",
+ )
+ logfile = BytesIO()
+ p = PopenSpawn(args, timeout=5)
+ p.logfile_read = logfile
+ p.expect_exact("Available Scrapy objects")
+ with MockServer() as mockserver:
+ p.sendline(f"fetch('{mockserver.url('/')}')")
+ p.sendline("type(response)")
+ p.expect_exact("HtmlResponse")
+ p.sendeof()
+ p.wait()
+ logfile.seek(0)
+ self.assertNotIn("Traceback", logfile.read().decode())
Index: Scrapy-2.11.0/tests/test_crawler.py
===================================================================
--- Scrapy-2.11.0.orig/tests/test_crawler.py
+++ Scrapy-2.11.0/tests/test_crawler.py
@@ -1,13 +1,16 @@
import logging
import os
import platform
+import signal
import subprocess
import sys
import warnings
from pathlib import Path
+from typing import List
import pytest
from packaging.version import parse as parse_version
+from pexpect.popen_spawn import PopenSpawn
from pytest import mark, raises
from twisted.internet import defer
from twisted.trial import unittest
@@ -289,9 +292,12 @@ class ScriptRunnerMixin:
script_dir: Path
cwd = os.getcwd()
- def run_script(self, script_name: str, *script_args):
+ def get_script_args(self, script_name: str, *script_args: str) -> List[str]:
script_path = self.script_dir / script_name
- args = [sys.executable, str(script_path)] + list(script_args)
+ return [sys.executable, str(script_path)] + list(script_args)
+
+ def run_script(self, script_name: str, *script_args: str) -> str:
+ args = self.get_script_args(script_name, *script_args)
p = subprocess.Popen(
args,
env=get_mockserver_env(),
@@ -517,6 +523,29 @@ class CrawlerProcessSubprocess(ScriptRun
self.assertIn("Spider closed (finished)", log)
self.assertIn("The value of FOO is 42", log)
+ def test_shutdown_graceful(self):
+ sig = signal.SIGINT if sys.platform != "win32" else signal.SIGBREAK
+ args = self.get_script_args("sleeping.py")
+ p = PopenSpawn(args, timeout=5)
+ p.expect_exact("Spider opened")
+ p.expect_exact("Crawled (200)")
+ p.kill(sig)
+ p.expect_exact("shutting down gracefully")
+ p.expect_exact("Spider closed (shutdown)")
+ p.wait()
+
+ def test_shutdown_forced(self):
+ sig = signal.SIGINT if sys.platform != "win32" else signal.SIGBREAK
+ args = self.get_script_args("sleeping.py")
+ p = PopenSpawn(args, timeout=5)
+ p.expect_exact("Spider opened")
+ p.expect_exact("Crawled (200)")
+ p.kill(sig)
+ p.expect_exact("shutting down gracefully")
+ p.kill(sig)
+ p.expect_exact("forcing unclean shutdown")
+ p.wait()
+
class CrawlerRunnerSubprocess(ScriptRunnerMixin, unittest.TestCase):
script_dir = Path(__file__).parent.resolve() / "CrawlerRunner"