forked from pool/python-Scrapy
		
	- Add patch twisted-23.8.0-compat.patch gh#scrapy/scrapy#6064
- Update to 2.11.0:
  - Spiders can now modify settings in their from_crawler methods,
    e.g. based on spider arguments.
  - Periodic logging of stats.
  - Bug fixes.
- 2.10.0:
  - Added Python 3.12 support, dropped Python 3.7 support.
  - The new add-ons framework simplifies configuring 3rd-party
    components that support it.
  - Exceptions to retry can now be configured.
  - Many fixes and improvements for feed exports.
- 2.9.0:
  - Per-domain download settings.
  - Compatibility with new cryptography and new parsel.
  - JMESPath selectors from the new parsel.
  - Bug fixes.
- 2.8.0:
  - This is a maintenance release, with minor features, bug fixes, and
    cleanups.
OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-Scrapy?expand=0&rev=34
			
			
This commit is contained in:
		
							
								
								
									
										3
									
								
								Scrapy-2.11.0.tar.gz
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								Scrapy-2.11.0.tar.gz
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| version https://git-lfs.github.com/spec/v1 | ||||
| oid sha256:3cbdedce0c3f0e0482d61be2d7458683be7cd7cf14b0ee6adfbaddb80f5b36a5 | ||||
| size 1171092 | ||||
| @@ -1,3 +0,0 @@ | ||||
| version https://git-lfs.github.com/spec/v1 | ||||
| oid sha256:30fa408353d24b1df979df2ea4afbd19b4ae02fb2207f218d246332f1e1cf14e | ||||
| size 1131084 | ||||
| @@ -1,3 +1,27 @@ | ||||
| ------------------------------------------------------------------- | ||||
| Wed Jan 10 07:50:52 UTC 2024 - Daniel Garcia <daniel.garcia@suse.com> | ||||
|  | ||||
| - Add patch twisted-23.8.0-compat.patch gh#scrapy/scrapy#6064 | ||||
| - Update to 2.11.0: | ||||
|   - Spiders can now modify settings in their from_crawler methods, | ||||
|     e.g. based on spider arguments. | ||||
|   - Periodic logging of stats. | ||||
|   - Bug fixes. | ||||
| - 2.10.0: | ||||
|   - Added Python 3.12 support, dropped Python 3.7 support. | ||||
|   - The new add-ons framework simplifies configuring 3rd-party | ||||
|     components that support it. | ||||
|   - Exceptions to retry can now be configured. | ||||
|   - Many fixes and improvements for feed exports. | ||||
| - 2.9.0: | ||||
|   - Per-domain download settings. | ||||
|   - Compatibility with new cryptography and new parsel. | ||||
|   - JMESPath selectors from the new parsel. | ||||
|   - Bug fixes. | ||||
| - 2.8.0: | ||||
|   - This is a maintenance release, with minor features, bug fixes, and | ||||
|     cleanups. | ||||
|  | ||||
| ------------------------------------------------------------------- | ||||
| Mon Nov  7 20:35:15 UTC 2022 - Yogalakshmi Arunachalam <yarunachalam@suse.com> | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| # | ||||
| # spec file for package python-Scrapy | ||||
| # | ||||
| # Copyright (c) 2022 SUSE LLC | ||||
| # Copyright (c) 2024 SUSE LLC | ||||
| # | ||||
| # All modifications and additions to the file contributed by third parties | ||||
| # remain the property of their copyright owners, unless otherwise agreed | ||||
| @@ -16,62 +16,63 @@ | ||||
| # | ||||
|  | ||||
|  | ||||
| %{?!python_module:%define python_module() python3-%{**}} | ||||
| %define skip_python2 1 | ||||
| Name:           python-Scrapy | ||||
| Version:        2.7.1 | ||||
| Version:        2.11.0 | ||||
| Release:        0 | ||||
| Summary:        A high-level Python Screen Scraping framework | ||||
| License:        BSD-3-Clause | ||||
| Group:          Development/Languages/Python | ||||
| URL:            https://scrapy.org | ||||
| Source:         https://files.pythonhosted.org/packages/source/S/Scrapy/Scrapy-%{version}.tar.gz | ||||
| # PATCH-FIX-UPSTREAM twisted-23.8.0-compat.patch gh#scrapy/scrapy#6064 | ||||
| Patch1:         twisted-23.8.0-compat.patch | ||||
| BuildRequires:  %{python_module Pillow} | ||||
| BuildRequires:  %{python_module Protego >= 0.1.15} | ||||
| BuildRequires:  %{python_module PyDispatcher >= 2.0.5} | ||||
| BuildRequires:  %{python_module Twisted >= 17.9.0} | ||||
| BuildRequires:  %{python_module botocore} | ||||
| BuildRequires:  %{python_module cryptography >= 2.0} | ||||
| BuildRequires:  %{python_module Twisted >= 18.9.0} | ||||
| BuildRequires:  %{python_module attrs} | ||||
| BuildRequires:  %{python_module botocore >= 1.4.87} | ||||
| BuildRequires:  %{python_module cryptography >= 36.0.0} | ||||
| BuildRequires:  %{python_module cssselect >= 0.9.1} | ||||
| BuildRequires:  %{python_module dbm} | ||||
| BuildRequires:  %{python_module itemadapter >= 0.1.0} | ||||
| BuildRequires:  %{python_module itemloaders >= 1.0.1} | ||||
| BuildRequires:  %{python_module jmespath} | ||||
| BuildRequires:  %{python_module lxml >= 3.5.0} | ||||
| BuildRequires:  %{python_module lxml >= 4.4.1} | ||||
| BuildRequires:  %{python_module parsel >= 1.5.0} | ||||
| BuildRequires:  %{python_module pyOpenSSL >= 16.2.0} | ||||
| BuildRequires:  %{python_module pexpect >= 4.8.1} | ||||
| BuildRequires:  %{python_module pyOpenSSL >= 21.0.0} | ||||
| BuildRequires:  %{python_module pyftpdlib} | ||||
| BuildRequires:  %{python_module pytest-xdist} | ||||
| BuildRequires:  %{python_module pytest} | ||||
| BuildRequires:  %{python_module queuelib >= 1.4.2} | ||||
| BuildRequires:  %{python_module service_identity >= 16.0.0} | ||||
| BuildRequires:  %{python_module service_identity >= 18.1.0} | ||||
| BuildRequires:  %{python_module setuptools} | ||||
| BuildRequires:  %{python_module sybil} | ||||
| BuildRequires:  %{python_module testfixtures >= 6.0.0} | ||||
| BuildRequires:  %{python_module testfixtures} | ||||
| BuildRequires:  %{python_module tldextract} | ||||
| BuildRequires:  %{python_module uvloop} | ||||
| BuildRequires:  %{python_module w3lib >= 1.17.0} | ||||
| BuildRequires:  %{python_module zope.interface >= 4.1.3} | ||||
| BuildRequires:  %{python_module zope.interface >= 5.1.0} | ||||
| BuildRequires:  fdupes | ||||
| BuildRequires:  python-rpm-macros | ||||
| BuildRequires:  python3-Sphinx | ||||
| BuildRequires:  (python3-dataclasses if python3-base < 3.7) | ||||
| Requires:       python-Protego >= 0.1.15 | ||||
| Requires:       python-PyDispatcher >= 2.0.5 | ||||
| Requires:       python-Twisted >= 17.9.0 | ||||
| Requires:       python-cryptography >= 2.0 | ||||
| Requires:       python-Twisted >= 18.9.0 | ||||
| Requires:       python-cryptography >= 36.0.0 | ||||
| Requires:       python-cssselect >= 0.9.1 | ||||
| Requires:       python-itemadapter >= 0.1.0 | ||||
| Requires:       python-itemloaders >= 1.0.1 | ||||
| Requires:       python-lxml >= 3.5.0 | ||||
| Requires:       python-lxml >= 4.4.1 | ||||
| Requires:       python-parsel >= 1.5.0 | ||||
| Requires:       python-pyOpenSSL >= 16.2.0 | ||||
| Requires:       python-pyOpenSSL >= 21.0.0 | ||||
| Requires:       python-queuelib >= 1.4.2 | ||||
| Requires:       python-service_identity >= 16.0.0 | ||||
| Requires:       python-service_identity >= 18.1.0 | ||||
| Requires:       python-setuptools | ||||
| Requires:       python-tldextract | ||||
| Requires:       python-w3lib >= 1.17.2 | ||||
| Requires:       python-zope.interface >= 4.1.3 | ||||
| Requires:       python-zope.interface >= 5.1.0 | ||||
| Requires(post): update-alternatives | ||||
| Requires(postun):update-alternatives | ||||
| BuildArch:      noarch | ||||
| @@ -90,8 +91,7 @@ Group:          Documentation/HTML | ||||
| Provides documentation for %{name}. | ||||
|  | ||||
| %prep | ||||
| %setup -n Scrapy-%{version} | ||||
| %autopatch -p1 | ||||
| %autosetup -p1 -n Scrapy-%{version} | ||||
|  | ||||
| sed -i -e 's:= python:= python3:g' docs/Makefile | ||||
|  | ||||
| @@ -111,7 +111,7 @@ popd | ||||
| skiplist="test_pformat" | ||||
| # no online connection to toscrapy.com | ||||
| skiplist="$skiplist or CheckCommandTest" | ||||
| %{pytest \ | ||||
| %{pytest -x \ | ||||
|     -k "not (${skiplist})" \ | ||||
|     -W ignore::DeprecationWarning \ | ||||
|     tests} | ||||
|   | ||||
							
								
								
									
										254
									
								
								twisted-23.8.0-compat.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										254
									
								
								twisted-23.8.0-compat.patch
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,254 @@ | ||||
| Index: Scrapy-2.11.0/scrapy/crawler.py | ||||
| =================================================================== | ||||
| --- Scrapy-2.11.0.orig/scrapy/crawler.py | ||||
| +++ Scrapy-2.11.0/scrapy/crawler.py | ||||
| @@ -404,8 +404,8 @@ class CrawlerProcess(CrawlerRunner): | ||||
|          :param bool stop_after_crawl: stop or not the reactor when all | ||||
|              crawlers have finished | ||||
|   | ||||
| -        :param bool install_signal_handlers: whether to install the shutdown | ||||
| -            handlers (default: True) | ||||
| +        :param bool install_signal_handlers: whether to install the OS signal | ||||
| +            handlers from Twisted and Scrapy (default: True) | ||||
|          """ | ||||
|          from twisted.internet import reactor | ||||
|   | ||||
| @@ -416,15 +416,17 @@ class CrawlerProcess(CrawlerRunner): | ||||
|                  return | ||||
|              d.addBoth(self._stop_reactor) | ||||
|   | ||||
| -        if install_signal_handlers: | ||||
| -            install_shutdown_handlers(self._signal_shutdown) | ||||
|          resolver_class = load_object(self.settings["DNS_RESOLVER"]) | ||||
|          resolver = create_instance(resolver_class, self.settings, self, reactor=reactor) | ||||
|          resolver.install_on_reactor() | ||||
|          tp = reactor.getThreadPool() | ||||
|          tp.adjustPoolsize(maxthreads=self.settings.getint("REACTOR_THREADPOOL_MAXSIZE")) | ||||
|          reactor.addSystemEventTrigger("before", "shutdown", self.stop) | ||||
| -        reactor.run(installSignalHandlers=False)  # blocking call | ||||
| +        if install_signal_handlers: | ||||
| +            reactor.addSystemEventTrigger( | ||||
| +                "after", "startup", install_shutdown_handlers, self._signal_shutdown | ||||
| +            ) | ||||
| +        reactor.run(installSignalHandlers=install_signal_handlers)  # blocking call | ||||
|   | ||||
|      def _graceful_stop_reactor(self) -> Deferred: | ||||
|          d = self.stop() | ||||
| Index: Scrapy-2.11.0/scrapy/utils/ossignal.py | ||||
| =================================================================== | ||||
| --- Scrapy-2.11.0.orig/scrapy/utils/ossignal.py | ||||
| +++ Scrapy-2.11.0/scrapy/utils/ossignal.py | ||||
| @@ -19,13 +19,10 @@ def install_shutdown_handlers( | ||||
|      function: SignalHandlerT, override_sigint: bool = True | ||||
|  ) -> None: | ||||
|      """Install the given function as a signal handler for all common shutdown | ||||
| -    signals (such as SIGINT, SIGTERM, etc). If override_sigint is ``False`` the | ||||
| -    SIGINT handler won't be install if there is already a handler in place | ||||
| -    (e.g.  Pdb) | ||||
| +    signals (such as SIGINT, SIGTERM, etc). If ``override_sigint`` is ``False`` the | ||||
| +    SIGINT handler won't be installed if there is already a handler in place | ||||
| +    (e.g. Pdb) | ||||
|      """ | ||||
| -    from twisted.internet import reactor | ||||
| - | ||||
| -    reactor._handleSignals() | ||||
|      signal.signal(signal.SIGTERM, function) | ||||
|      if signal.getsignal(signal.SIGINT) == signal.default_int_handler or override_sigint: | ||||
|          signal.signal(signal.SIGINT, function) | ||||
| Index: Scrapy-2.11.0/scrapy/utils/testproc.py | ||||
| =================================================================== | ||||
| --- Scrapy-2.11.0.orig/scrapy/utils/testproc.py | ||||
| +++ Scrapy-2.11.0/scrapy/utils/testproc.py | ||||
| @@ -2,7 +2,7 @@ from __future__ import annotations | ||||
|   | ||||
|  import os | ||||
|  import sys | ||||
| -from typing import Iterable, Optional, Tuple, cast | ||||
| +from typing import Iterable, List, Optional, Tuple, cast | ||||
|   | ||||
|  from twisted.internet.defer import Deferred | ||||
|  from twisted.internet.error import ProcessTerminated | ||||
| @@ -26,14 +26,15 @@ class ProcessTest: | ||||
|          env = os.environ.copy() | ||||
|          if settings is not None: | ||||
|              env["SCRAPY_SETTINGS_MODULE"] = settings | ||||
| +        assert self.command | ||||
|          cmd = self.prefix + [self.command] + list(args) | ||||
|          pp = TestProcessProtocol() | ||||
| -        pp.deferred.addBoth(self._process_finished, cmd, check_code) | ||||
| +        pp.deferred.addCallback(self._process_finished, cmd, check_code) | ||||
|          reactor.spawnProcess(pp, cmd[0], cmd, env=env, path=self.cwd) | ||||
|          return pp.deferred | ||||
|   | ||||
|      def _process_finished( | ||||
| -        self, pp: TestProcessProtocol, cmd: str, check_code: bool | ||||
| +        self, pp: TestProcessProtocol, cmd: List[str], check_code: bool | ||||
|      ) -> Tuple[int, bytes, bytes]: | ||||
|          if pp.exitcode and check_code: | ||||
|              msg = f"process {cmd} exit with code {pp.exitcode}" | ||||
| Index: Scrapy-2.11.0/setup.py | ||||
| =================================================================== | ||||
| --- Scrapy-2.11.0.orig/setup.py | ||||
| +++ Scrapy-2.11.0/setup.py | ||||
| @@ -6,8 +6,7 @@ version = (Path(__file__).parent / "scra | ||||
|   | ||||
|   | ||||
|  install_requires = [ | ||||
| -    # 23.8.0 incompatibility: https://github.com/scrapy/scrapy/issues/6024 | ||||
| -    "Twisted>=18.9.0,<23.8.0", | ||||
| +    "Twisted>=18.9.0", | ||||
|      "cryptography>=36.0.0", | ||||
|      "cssselect>=0.9.1", | ||||
|      "itemloaders>=1.0.1", | ||||
| Index: Scrapy-2.11.0/tests/CrawlerProcess/sleeping.py | ||||
| =================================================================== | ||||
| --- /dev/null | ||||
| +++ Scrapy-2.11.0/tests/CrawlerProcess/sleeping.py | ||||
| @@ -0,0 +1,24 @@ | ||||
| +from twisted.internet.defer import Deferred | ||||
| + | ||||
| +import scrapy | ||||
| +from scrapy.crawler import CrawlerProcess | ||||
| +from scrapy.utils.defer import maybe_deferred_to_future | ||||
| + | ||||
| + | ||||
| +class SleepingSpider(scrapy.Spider): | ||||
| +    name = "sleeping" | ||||
| + | ||||
| +    start_urls = ["data:,;"] | ||||
| + | ||||
| +    async def parse(self, response): | ||||
| +        from twisted.internet import reactor | ||||
| + | ||||
| +        d = Deferred() | ||||
| +        reactor.callLater(3, d.callback, None) | ||||
| +        await maybe_deferred_to_future(d) | ||||
| + | ||||
| + | ||||
| +process = CrawlerProcess(settings={}) | ||||
| + | ||||
| +process.crawl(SleepingSpider) | ||||
| +process.start() | ||||
| Index: Scrapy-2.11.0/tests/requirements.txt | ||||
| =================================================================== | ||||
| --- Scrapy-2.11.0.orig/tests/requirements.txt | ||||
| +++ Scrapy-2.11.0/tests/requirements.txt | ||||
| @@ -1,5 +1,6 @@ | ||||
|  # Tests requirements | ||||
|  attrs | ||||
| +pexpect >= 4.8.0 | ||||
|  # https://github.com/giampaolo/pyftpdlib/issues/560 | ||||
|  pyftpdlib; python_version < "3.12" | ||||
|  pytest | ||||
| Index: Scrapy-2.11.0/tests/test_command_shell.py | ||||
| =================================================================== | ||||
| --- Scrapy-2.11.0.orig/tests/test_command_shell.py | ||||
| +++ Scrapy-2.11.0/tests/test_command_shell.py | ||||
| @@ -1,11 +1,15 @@ | ||||
| +import sys | ||||
| +from io import BytesIO | ||||
|  from pathlib import Path | ||||
|   | ||||
| +from pexpect.popen_spawn import PopenSpawn | ||||
|  from twisted.internet import defer | ||||
|  from twisted.trial import unittest | ||||
|   | ||||
|  from scrapy.utils.testproc import ProcessTest | ||||
|  from scrapy.utils.testsite import SiteTest | ||||
|  from tests import NON_EXISTING_RESOLVABLE, tests_datadir | ||||
| +from tests.mockserver import MockServer | ||||
|   | ||||
|   | ||||
|  class ShellTest(ProcessTest, SiteTest, unittest.TestCase): | ||||
| @@ -133,3 +137,25 @@ class ShellTest(ProcessTest, SiteTest, u | ||||
|          args = ["-c", code, "--set", f"TWISTED_REACTOR={reactor_path}"] | ||||
|          _, _, err = yield self.execute(args, check_code=True) | ||||
|          self.assertNotIn(b"RuntimeError: There is no current event loop in thread", err) | ||||
| + | ||||
| + | ||||
| +class InteractiveShellTest(unittest.TestCase): | ||||
| +    def test_fetch(self): | ||||
| +        args = ( | ||||
| +            sys.executable, | ||||
| +            "-m", | ||||
| +            "scrapy.cmdline", | ||||
| +            "shell", | ||||
| +        ) | ||||
| +        logfile = BytesIO() | ||||
| +        p = PopenSpawn(args, timeout=5) | ||||
| +        p.logfile_read = logfile | ||||
| +        p.expect_exact("Available Scrapy objects") | ||||
| +        with MockServer() as mockserver: | ||||
| +            p.sendline(f"fetch('{mockserver.url('/')}')") | ||||
| +            p.sendline("type(response)") | ||||
| +            p.expect_exact("HtmlResponse") | ||||
| +        p.sendeof() | ||||
| +        p.wait() | ||||
| +        logfile.seek(0) | ||||
| +        self.assertNotIn("Traceback", logfile.read().decode()) | ||||
| Index: Scrapy-2.11.0/tests/test_crawler.py | ||||
| =================================================================== | ||||
| --- Scrapy-2.11.0.orig/tests/test_crawler.py | ||||
| +++ Scrapy-2.11.0/tests/test_crawler.py | ||||
| @@ -1,13 +1,16 @@ | ||||
|  import logging | ||||
|  import os | ||||
|  import platform | ||||
| +import signal | ||||
|  import subprocess | ||||
|  import sys | ||||
|  import warnings | ||||
|  from pathlib import Path | ||||
| +from typing import List | ||||
|   | ||||
|  import pytest | ||||
|  from packaging.version import parse as parse_version | ||||
| +from pexpect.popen_spawn import PopenSpawn | ||||
|  from pytest import mark, raises | ||||
|  from twisted.internet import defer | ||||
|  from twisted.trial import unittest | ||||
| @@ -289,9 +292,12 @@ class ScriptRunnerMixin: | ||||
|      script_dir: Path | ||||
|      cwd = os.getcwd() | ||||
|   | ||||
| -    def run_script(self, script_name: str, *script_args): | ||||
| +    def get_script_args(self, script_name: str, *script_args: str) -> List[str]: | ||||
|          script_path = self.script_dir / script_name | ||||
| -        args = [sys.executable, str(script_path)] + list(script_args) | ||||
| +        return [sys.executable, str(script_path)] + list(script_args) | ||||
| + | ||||
| +    def run_script(self, script_name: str, *script_args: str) -> str: | ||||
| +        args = self.get_script_args(script_name, *script_args) | ||||
|          p = subprocess.Popen( | ||||
|              args, | ||||
|              env=get_mockserver_env(), | ||||
| @@ -517,6 +523,29 @@ class CrawlerProcessSubprocess(ScriptRun | ||||
|          self.assertIn("Spider closed (finished)", log) | ||||
|          self.assertIn("The value of FOO is 42", log) | ||||
|   | ||||
| +    def test_shutdown_graceful(self): | ||||
| +        sig = signal.SIGINT if sys.platform != "win32" else signal.SIGBREAK | ||||
| +        args = self.get_script_args("sleeping.py") | ||||
| +        p = PopenSpawn(args, timeout=5) | ||||
| +        p.expect_exact("Spider opened") | ||||
| +        p.expect_exact("Crawled (200)") | ||||
| +        p.kill(sig) | ||||
| +        p.expect_exact("shutting down gracefully") | ||||
| +        p.expect_exact("Spider closed (shutdown)") | ||||
| +        p.wait() | ||||
| + | ||||
| +    def test_shutdown_forced(self): | ||||
| +        sig = signal.SIGINT if sys.platform != "win32" else signal.SIGBREAK | ||||
| +        args = self.get_script_args("sleeping.py") | ||||
| +        p = PopenSpawn(args, timeout=5) | ||||
| +        p.expect_exact("Spider opened") | ||||
| +        p.expect_exact("Crawled (200)") | ||||
| +        p.kill(sig) | ||||
| +        p.expect_exact("shutting down gracefully") | ||||
| +        p.kill(sig) | ||||
| +        p.expect_exact("forcing unclean shutdown") | ||||
| +        p.wait() | ||||
| + | ||||
|   | ||||
|  class CrawlerRunnerSubprocess(ScriptRunnerMixin, unittest.TestCase): | ||||
|      script_dir = Path(__file__).parent.resolve() / "CrawlerRunner" | ||||
		Reference in New Issue
	
	Block a user