14
0
forked from pool/python-Scrapy

Accepting request 958587 from devel:languages:python

- Update to v2.6.1
  * Security fixes for cookie handling (CVE-2022-0577 aka
    bsc#1196638, GHSA-mfjm-vh54-3f96)
  * Python 3.10 support
  * asyncio support is no longer considered experimental, and works
    out-of-the-box on Windows regardless of your Python version
  * Feed exports now support pathlib.Path output paths and per-feed
    item filtering and post-processing
- Remove unnecessary patches:
  - remove-h2-version-restriction.patch
  - add-peak-method-to-queues.patch

OBS-URL: https://build.opensuse.org/request/show/958587
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/python-Scrapy?expand=0&rev=13
This commit is contained in:
2022-03-03 23:17:11 +00:00
committed by Git OBS Bridge
6 changed files with 23 additions and 602 deletions

View File

@@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:13af6032476ab4256158220e530411290b3b934dd602bb6dacacbf6d16141f49
size 1072669

3
Scrapy-2.6.1.tar.gz Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:56fd55a59d0f329ce752892358abee5a6b50b4fc55a40420ea317dc617553827
size 1103155

View File

@@ -1,581 +0,0 @@
--- a/scrapy/pqueues.py
+++ b/scrapy/pqueues.py
@@ -3,6 +3,7 @@ import logging
from scrapy.utils.misc import create_instance
+
logger = logging.getLogger(__name__)
@@ -17,8 +18,7 @@ def _path_safe(text):
>>> _path_safe('some@symbol?').startswith('some_symbol_')
True
"""
- pathable_slot = "".join([c if c.isalnum() or c in '-._' else '_'
- for c in text])
+ pathable_slot = "".join([c if c.isalnum() or c in '-._' else '_' for c in text])
# as we replace some letters we can get collision for different slots
# add we add unique part
unique_slot = hashlib.md5(text.encode('utf8')).hexdigest()
@@ -35,6 +35,9 @@ class ScrapyPriorityQueue:
* close()
* __len__()
+ Optionally, the queue could provide a ``peek`` method, that should return the
+ next object to be returned by ``pop``, but without removing it from the queue.
+
``__init__`` method of ScrapyPriorityQueue receives a downstream_queue_cls
argument, which is a class used to instantiate a new (internal) queue when
a new priority is allocated.
@@ -70,10 +73,12 @@ class ScrapyPriorityQueue:
self.curprio = min(startprios)
def qfactory(self, key):
- return create_instance(self.downstream_queue_cls,
- None,
- self.crawler,
- self.key + '/' + str(key))
+ return create_instance(
+ self.downstream_queue_cls,
+ None,
+ self.crawler,
+ self.key + '/' + str(key),
+ )
def priority(self, request):
return -request.priority
@@ -99,6 +104,18 @@ class ScrapyPriorityQueue:
self.curprio = min(prios) if prios else None
return m
+ def peek(self):
+ """Returns the next object to be returned by :meth:`pop`,
+ but without removing it from the queue.
+
+ Raises :exc:`NotImplementedError` if the underlying queue class does
+ not implement a ``peek`` method, which is optional for queues.
+ """
+ if self.curprio is None:
+ return None
+ queue = self.queues[self.curprio]
+ return queue.peek()
+
def close(self):
active = []
for p, q in self.queues.items():
@@ -116,8 +133,7 @@ class DownloaderInterface:
self.downloader = crawler.engine.downloader
def stats(self, possible_slots):
- return [(self._active_downloads(slot), slot)
- for slot in possible_slots]
+ return [(self._active_downloads(slot), slot) for slot in possible_slots]
def get_slot_key(self, request):
return self.downloader._get_slot_key(request, None)
@@ -162,10 +178,12 @@ class DownloaderAwarePriorityQueue:
self.pqueues[slot] = self.pqfactory(slot, startprios)
def pqfactory(self, slot, startprios=()):
- return ScrapyPriorityQueue(self.crawler,
- self.downstream_queue_cls,
- self.key + '/' + _path_safe(slot),
- startprios)
+ return ScrapyPriorityQueue(
+ self.crawler,
+ self.downstream_queue_cls,
+ self.key + '/' + _path_safe(slot),
+ startprios,
+ )
def pop(self):
stats = self._downloader_interface.stats(self.pqueues)
@@ -187,9 +205,22 @@ class DownloaderAwarePriorityQueue:
queue = self.pqueues[slot]
queue.push(request)
+ def peek(self):
+ """Returns the next object to be returned by :meth:`pop`,
+ but without removing it from the queue.
+
+ Raises :exc:`NotImplementedError` if the underlying queue class does
+ not implement a ``peek`` method, which is optional for queues.
+ """
+ stats = self._downloader_interface.stats(self.pqueues)
+ if not stats:
+ return None
+ slot = min(stats)[1]
+ queue = self.pqueues[slot]
+ return queue.peek()
+
def close(self):
- active = {slot: queue.close()
- for slot, queue in self.pqueues.items()}
+ active = {slot: queue.close() for slot, queue in self.pqueues.items()}
self.pqueues.clear()
return active
--- a/scrapy/squeues.py
+++ b/scrapy/squeues.py
@@ -19,7 +19,6 @@ def _with_mkdir(queue_class):
dirname = os.path.dirname(path)
if not os.path.exists(dirname):
os.makedirs(dirname, exist_ok=True)
-
super().__init__(path, *args, **kwargs)
return DirectoriesCreated
@@ -38,6 +37,20 @@ def _serializable_queue(queue_class, ser
if s:
return deserialize(s)
+ def peek(self):
+ """Returns the next object to be returned by :meth:`pop`,
+ but without removing it from the queue.
+
+ Raises :exc:`NotImplementedError` if the underlying queue class does
+ not implement a ``peek`` method, which is optional for queues.
+ """
+ try:
+ s = super().peek()
+ except AttributeError as ex:
+ raise NotImplementedError("The underlying queue class does not implement 'peek'") from ex
+ if s:
+ return deserialize(s)
+
return SerializableQueue
@@ -59,12 +72,21 @@ def _scrapy_serialization_queue(queue_cl
def pop(self):
request = super().pop()
-
if not request:
return None
+ return request_from_dict(request, self.spider)
- request = request_from_dict(request, self.spider)
- return request
+ def peek(self):
+ """Returns the next object to be returned by :meth:`pop`,
+ but without removing it from the queue.
+
+ Raises :exc:`NotImplementedError` if the underlying queue class does
+ not implement a ``peek`` method, which is optional for queues.
+ """
+ request = super().peek()
+ if not request:
+ return None
+ return request_from_dict(request, self.spider)
return ScrapyRequestQueue
@@ -76,6 +98,19 @@ def _scrapy_non_serialization_queue(queu
def from_crawler(cls, crawler, *args, **kwargs):
return cls()
+ def peek(self):
+ """Returns the next object to be returned by :meth:`pop`,
+ but without removing it from the queue.
+
+ Raises :exc:`NotImplementedError` if the underlying queue class does
+ not implement a ``peek`` method, which is optional for queues.
+ """
+ try:
+ s = super().peek()
+ except AttributeError as ex:
+ raise NotImplementedError("The underlying queue class does not implement 'peek'") from ex
+ return s
+
return ScrapyRequestQueue
@@ -109,17 +144,9 @@ MarshalLifoDiskQueueNonRequest = _serial
marshal.loads
)
-PickleFifoDiskQueue = _scrapy_serialization_queue(
- PickleFifoDiskQueueNonRequest
-)
-PickleLifoDiskQueue = _scrapy_serialization_queue(
- PickleLifoDiskQueueNonRequest
-)
-MarshalFifoDiskQueue = _scrapy_serialization_queue(
- MarshalFifoDiskQueueNonRequest
-)
-MarshalLifoDiskQueue = _scrapy_serialization_queue(
- MarshalLifoDiskQueueNonRequest
-)
+PickleFifoDiskQueue = _scrapy_serialization_queue(PickleFifoDiskQueueNonRequest)
+PickleLifoDiskQueue = _scrapy_serialization_queue(PickleLifoDiskQueueNonRequest)
+MarshalFifoDiskQueue = _scrapy_serialization_queue(MarshalFifoDiskQueueNonRequest)
+MarshalLifoDiskQueue = _scrapy_serialization_queue(MarshalLifoDiskQueueNonRequest)
FifoMemoryQueue = _scrapy_non_serialization_queue(queue.FifoMemoryQueue)
LifoMemoryQueue = _scrapy_non_serialization_queue(queue.LifoMemoryQueue)
--- /dev/null
+++ b/tests/test_pqueues.py
@@ -0,0 +1,144 @@
+import tempfile
+import unittest
+
+import queuelib
+
+from scrapy.http.request import Request
+from scrapy.pqueues import ScrapyPriorityQueue, DownloaderAwarePriorityQueue
+from scrapy.spiders import Spider
+from scrapy.squeues import FifoMemoryQueue
+from scrapy.utils.test import get_crawler
+
+from tests.test_scheduler import MockDownloader, MockEngine
+
+
+class PriorityQueueTest(unittest.TestCase):
+ def setUp(self):
+ self.crawler = get_crawler(Spider)
+ self.spider = self.crawler._create_spider("foo")
+
+ def test_queue_push_pop_one(self):
+ temp_dir = tempfile.mkdtemp()
+ queue = ScrapyPriorityQueue.from_crawler(self.crawler, FifoMemoryQueue, temp_dir)
+ self.assertIsNone(queue.pop())
+ self.assertEqual(len(queue), 0)
+ req1 = Request("https://example.org/1", priority=1)
+ queue.push(req1)
+ self.assertEqual(len(queue), 1)
+ dequeued = queue.pop()
+ self.assertEqual(len(queue), 0)
+ self.assertEqual(dequeued.url, req1.url)
+ self.assertEqual(dequeued.priority, req1.priority)
+ self.assertEqual(queue.close(), [])
+
+ def test_no_peek_raises(self):
+ if hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
+ raise unittest.SkipTest("queuelib.queue.FifoMemoryQueue.peek is defined")
+ temp_dir = tempfile.mkdtemp()
+ queue = ScrapyPriorityQueue.from_crawler(self.crawler, FifoMemoryQueue, temp_dir)
+ queue.push(Request("https://example.org"))
+ with self.assertRaises(NotImplementedError, msg="The underlying queue class does not implement 'peek'"):
+ queue.peek()
+ queue.close()
+
+ def test_peek(self):
+ if not hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
+ raise unittest.SkipTest("queuelib.queue.FifoMemoryQueue.peek is undefined")
+ temp_dir = tempfile.mkdtemp()
+ queue = ScrapyPriorityQueue.from_crawler(self.crawler, FifoMemoryQueue, temp_dir)
+ self.assertEqual(len(queue), 0)
+ self.assertIsNone(queue.peek())
+ req1 = Request("https://example.org/1")
+ req2 = Request("https://example.org/2")
+ req3 = Request("https://example.org/3")
+ queue.push(req1)
+ queue.push(req2)
+ queue.push(req3)
+ self.assertEqual(len(queue), 3)
+ self.assertEqual(queue.peek().url, req1.url)
+ self.assertEqual(queue.pop().url, req1.url)
+ self.assertEqual(len(queue), 2)
+ self.assertEqual(queue.peek().url, req2.url)
+ self.assertEqual(queue.pop().url, req2.url)
+ self.assertEqual(len(queue), 1)
+ self.assertEqual(queue.peek().url, req3.url)
+ self.assertEqual(queue.pop().url, req3.url)
+ self.assertEqual(queue.close(), [])
+
+ def test_queue_push_pop_priorities(self):
+ temp_dir = tempfile.mkdtemp()
+ queue = ScrapyPriorityQueue.from_crawler(self.crawler, FifoMemoryQueue, temp_dir, [-1, -2, -3])
+ self.assertIsNone(queue.pop())
+ self.assertEqual(len(queue), 0)
+ req1 = Request("https://example.org/1", priority=1)
+ req2 = Request("https://example.org/2", priority=2)
+ req3 = Request("https://example.org/3", priority=3)
+ queue.push(req1)
+ queue.push(req2)
+ queue.push(req3)
+ self.assertEqual(len(queue), 3)
+ dequeued = queue.pop()
+ self.assertEqual(len(queue), 2)
+ self.assertEqual(dequeued.url, req3.url)
+ self.assertEqual(dequeued.priority, req3.priority)
+ self.assertEqual(queue.close(), [-1, -2])
+
+
+class DownloaderAwarePriorityQueueTest(unittest.TestCase):
+ def setUp(self):
+ crawler = get_crawler(Spider)
+ crawler.engine = MockEngine(downloader=MockDownloader())
+ self.queue = DownloaderAwarePriorityQueue.from_crawler(
+ crawler=crawler,
+ downstream_queue_cls=FifoMemoryQueue,
+ key="foo/bar",
+ )
+
+ def tearDown(self):
+ self.queue.close()
+
+ def test_push_pop(self):
+ self.assertEqual(len(self.queue), 0)
+ self.assertIsNone(self.queue.pop())
+ req1 = Request("http://www.example.com/1")
+ req2 = Request("http://www.example.com/2")
+ req3 = Request("http://www.example.com/3")
+ self.queue.push(req1)
+ self.queue.push(req2)
+ self.queue.push(req3)
+ self.assertEqual(len(self.queue), 3)
+ self.assertEqual(self.queue.pop().url, req1.url)
+ self.assertEqual(len(self.queue), 2)
+ self.assertEqual(self.queue.pop().url, req2.url)
+ self.assertEqual(len(self.queue), 1)
+ self.assertEqual(self.queue.pop().url, req3.url)
+ self.assertEqual(len(self.queue), 0)
+ self.assertIsNone(self.queue.pop())
+
+ def test_no_peek_raises(self):
+ if hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
+ raise unittest.SkipTest("queuelib.queue.FifoMemoryQueue.peek is defined")
+ self.queue.push(Request("https://example.org"))
+ with self.assertRaises(NotImplementedError, msg="The underlying queue class does not implement 'peek'"):
+ self.queue.peek()
+
+ def test_peek(self):
+ if not hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
+ raise unittest.SkipTest("queuelib.queue.FifoMemoryQueue.peek is undefined")
+ self.assertEqual(len(self.queue), 0)
+ req1 = Request("https://example.org/1")
+ req2 = Request("https://example.org/2")
+ req3 = Request("https://example.org/3")
+ self.queue.push(req1)
+ self.queue.push(req2)
+ self.queue.push(req3)
+ self.assertEqual(len(self.queue), 3)
+ self.assertEqual(self.queue.peek().url, req1.url)
+ self.assertEqual(self.queue.pop().url, req1.url)
+ self.assertEqual(len(self.queue), 2)
+ self.assertEqual(self.queue.peek().url, req2.url)
+ self.assertEqual(self.queue.pop().url, req2.url)
+ self.assertEqual(len(self.queue), 1)
+ self.assertEqual(self.queue.peek().url, req3.url)
+ self.assertEqual(self.queue.pop().url, req3.url)
+ self.assertIsNone(self.queue.peek())
--- /dev/null
+++ b/tests/test_squeues_request.py
@@ -0,0 +1,214 @@
+import shutil
+import tempfile
+import unittest
+
+import queuelib
+
+from scrapy.squeues import (
+ PickleFifoDiskQueue,
+ PickleLifoDiskQueue,
+ MarshalFifoDiskQueue,
+ MarshalLifoDiskQueue,
+ FifoMemoryQueue,
+ LifoMemoryQueue,
+)
+from scrapy.http import Request
+from scrapy.spiders import Spider
+from scrapy.utils.test import get_crawler
+
+
+"""
+Queues that handle requests
+"""
+
+
+class BaseQueueTestCase(unittest.TestCase):
+ def setUp(self):
+ self.tmpdir = tempfile.mkdtemp(prefix="scrapy-queue-tests-")
+ self.qpath = self.tempfilename()
+ self.qdir = self.mkdtemp()
+ self.crawler = get_crawler(Spider)
+
+ def tearDown(self):
+ shutil.rmtree(self.tmpdir)
+
+ def tempfilename(self):
+ with tempfile.NamedTemporaryFile(dir=self.tmpdir) as nf:
+ return nf.name
+
+ def mkdtemp(self):
+ return tempfile.mkdtemp(dir=self.tmpdir)
+
+
+class RequestQueueTestMixin:
+ def queue(self):
+ raise NotImplementedError()
+
+ def test_one_element_with_peek(self):
+ if not hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
+ raise unittest.SkipTest("The queuelib queues do not define peek")
+ q = self.queue()
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.peek())
+ self.assertIsNone(q.pop())
+ req = Request("http://www.example.com")
+ q.push(req)
+ self.assertEqual(len(q), 1)
+ self.assertEqual(q.peek().url, req.url)
+ self.assertEqual(q.pop().url, req.url)
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.peek())
+ self.assertIsNone(q.pop())
+ q.close()
+
+ def test_one_element_without_peek(self):
+ if hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
+ raise unittest.SkipTest("The queuelib queues define peek")
+ q = self.queue()
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.pop())
+ req = Request("http://www.example.com")
+ q.push(req)
+ self.assertEqual(len(q), 1)
+ with self.assertRaises(NotImplementedError, msg="The underlying queue class does not implement 'peek'"):
+ q.peek()
+ self.assertEqual(q.pop().url, req.url)
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.pop())
+ q.close()
+
+
+class FifoQueueMixin(RequestQueueTestMixin):
+ def test_fifo_with_peek(self):
+ if not hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
+ raise unittest.SkipTest("The queuelib queues do not define peek")
+ q = self.queue()
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.peek())
+ self.assertIsNone(q.pop())
+ req1 = Request("http://www.example.com/1")
+ req2 = Request("http://www.example.com/2")
+ req3 = Request("http://www.example.com/3")
+ q.push(req1)
+ q.push(req2)
+ q.push(req3)
+ self.assertEqual(len(q), 3)
+ self.assertEqual(q.peek().url, req1.url)
+ self.assertEqual(q.pop().url, req1.url)
+ self.assertEqual(len(q), 2)
+ self.assertEqual(q.peek().url, req2.url)
+ self.assertEqual(q.pop().url, req2.url)
+ self.assertEqual(len(q), 1)
+ self.assertEqual(q.peek().url, req3.url)
+ self.assertEqual(q.pop().url, req3.url)
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.peek())
+ self.assertIsNone(q.pop())
+ q.close()
+
+ def test_fifo_without_peek(self):
+ if hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
+ raise unittest.SkipTest("The queuelib queues do not define peek")
+ q = self.queue()
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.pop())
+ req1 = Request("http://www.example.com/1")
+ req2 = Request("http://www.example.com/2")
+ req3 = Request("http://www.example.com/3")
+ q.push(req1)
+ q.push(req2)
+ q.push(req3)
+ with self.assertRaises(NotImplementedError, msg="The underlying queue class does not implement 'peek'"):
+ q.peek()
+ self.assertEqual(len(q), 3)
+ self.assertEqual(q.pop().url, req1.url)
+ self.assertEqual(len(q), 2)
+ self.assertEqual(q.pop().url, req2.url)
+ self.assertEqual(len(q), 1)
+ self.assertEqual(q.pop().url, req3.url)
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.pop())
+ q.close()
+
+
+class LifoQueueMixin(RequestQueueTestMixin):
+ def test_lifo_with_peek(self):
+ if not hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
+ raise unittest.SkipTest("The queuelib queues do not define peek")
+ q = self.queue()
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.peek())
+ self.assertIsNone(q.pop())
+ req1 = Request("http://www.example.com/1")
+ req2 = Request("http://www.example.com/2")
+ req3 = Request("http://www.example.com/3")
+ q.push(req1)
+ q.push(req2)
+ q.push(req3)
+ self.assertEqual(len(q), 3)
+ self.assertEqual(q.peek().url, req3.url)
+ self.assertEqual(q.pop().url, req3.url)
+ self.assertEqual(len(q), 2)
+ self.assertEqual(q.peek().url, req2.url)
+ self.assertEqual(q.pop().url, req2.url)
+ self.assertEqual(len(q), 1)
+ self.assertEqual(q.peek().url, req1.url)
+ self.assertEqual(q.pop().url, req1.url)
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.peek())
+ self.assertIsNone(q.pop())
+ q.close()
+
+ def test_lifo_without_peek(self):
+ if hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
+ raise unittest.SkipTest("The queuelib queues do not define peek")
+ q = self.queue()
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.pop())
+ req1 = Request("http://www.example.com/1")
+ req2 = Request("http://www.example.com/2")
+ req3 = Request("http://www.example.com/3")
+ q.push(req1)
+ q.push(req2)
+ q.push(req3)
+ with self.assertRaises(NotImplementedError, msg="The underlying queue class does not implement 'peek'"):
+ q.peek()
+ self.assertEqual(len(q), 3)
+ self.assertEqual(q.pop().url, req3.url)
+ self.assertEqual(len(q), 2)
+ self.assertEqual(q.pop().url, req2.url)
+ self.assertEqual(len(q), 1)
+ self.assertEqual(q.pop().url, req1.url)
+ self.assertEqual(len(q), 0)
+ self.assertIsNone(q.pop())
+ q.close()
+
+
+class PickleFifoDiskQueueRequestTest(FifoQueueMixin, BaseQueueTestCase):
+ def queue(self):
+ return PickleFifoDiskQueue.from_crawler(crawler=self.crawler, key="pickle/fifo")
+
+
+class PickleLifoDiskQueueRequestTest(LifoQueueMixin, BaseQueueTestCase):
+ def queue(self):
+ return PickleLifoDiskQueue.from_crawler(crawler=self.crawler, key="pickle/lifo")
+
+
+class MarshalFifoDiskQueueRequestTest(FifoQueueMixin, BaseQueueTestCase):
+ def queue(self):
+ return MarshalFifoDiskQueue.from_crawler(crawler=self.crawler, key="marshal/fifo")
+
+
+class MarshalLifoDiskQueueRequestTest(LifoQueueMixin, BaseQueueTestCase):
+ def queue(self):
+ return MarshalLifoDiskQueue.from_crawler(crawler=self.crawler, key="marshal/lifo")
+
+
+class FifoMemoryQueueRequestTest(FifoQueueMixin, BaseQueueTestCase):
+ def queue(self):
+ return FifoMemoryQueue.from_crawler(crawler=self.crawler)
+
+
+class LifoMemoryQueueRequestTest(LifoQueueMixin, BaseQueueTestCase):
+ def queue(self):
+ return LifoMemoryQueue.from_crawler(crawler=self.crawler)

View File

@@ -1,3 +1,18 @@
-------------------------------------------------------------------
Wed Mar 2 22:17:28 UTC 2022 - Matej Cepl <mcepl@suse.com>
- Update to v2.6.1
* Security fixes for cookie handling (CVE-2022-0577 aka
bsc#1196638, GHSA-mfjm-vh54-3f96)
* Python 3.10 support
* asyncio support is no longer considered experimental, and works
out-of-the-box on Windows regardless of your Python version
* Feed exports now support pathlib.Path output paths and per-feed
item filtering and post-processing
- Remove unnecessary patches:
- remove-h2-version-restriction.patch
- add-peak-method-to-queues.patch
------------------------------------------------------------------- -------------------------------------------------------------------
Sun Jan 16 21:07:14 UTC 2022 - Ben Greiner <code@bnavigator.de> Sun Jan 16 21:07:14 UTC 2022 - Ben Greiner <code@bnavigator.de>

View File

@@ -1,7 +1,7 @@
# #
# spec file for package python-Scrapy # spec file for package python-Scrapy
# #
# Copyright (c) 2021 SUSE LLC # Copyright (c) 2022 SUSE LLC
# #
# All modifications and additions to the file contributed by third parties # All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed # remain the property of their copyright owners, unless otherwise agreed
@@ -19,17 +19,13 @@
%{?!python_module:%define python_module() python3-%{**}} %{?!python_module:%define python_module() python3-%{**}}
%define skip_python2 1 %define skip_python2 1
Name: python-Scrapy Name: python-Scrapy
Version: 2.5.1 Version: 2.6.1
Release: 0 Release: 0
Summary: A high-level Python Screen Scraping framework Summary: A high-level Python Screen Scraping framework
License: BSD-3-Clause License: BSD-3-Clause
Group: Development/Languages/Python Group: Development/Languages/Python
URL: https://scrapy.org URL: https://scrapy.org
Source: https://files.pythonhosted.org/packages/source/S/Scrapy/Scrapy-%{version}.tar.gz Source: https://files.pythonhosted.org/packages/source/S/Scrapy/Scrapy-%{version}.tar.gz
# PATCH-FIX-OPENSUSE remove-h2-version-restriction.patch boo#1190035 -- run scrapy with h2 >= 4.0.0
Patch0: remove-h2-version-restriction.patch
# PATCH-FIX-UPSTREAM add-peak-method-to-queues.patch https://github.com/scrapy/scrapy/commit/68379197986ae3deb81a545b5fd6920ea3347094
Patch1: add-peak-method-to-queues.patch
BuildRequires: %{python_module Pillow} BuildRequires: %{python_module Pillow}
BuildRequires: %{python_module Protego >= 0.1.15} BuildRequires: %{python_module Protego >= 0.1.15}
BuildRequires: %{python_module PyDispatcher >= 2.0.5} BuildRequires: %{python_module PyDispatcher >= 2.0.5}
@@ -52,8 +48,9 @@ BuildRequires: %{python_module service_identity >= 16.0.0}
BuildRequires: %{python_module setuptools} BuildRequires: %{python_module setuptools}
BuildRequires: %{python_module sybil} BuildRequires: %{python_module sybil}
BuildRequires: %{python_module testfixtures >= 6.0.0} BuildRequires: %{python_module testfixtures >= 6.0.0}
BuildRequires: %{python_module tldextract}
BuildRequires: %{python_module uvloop} BuildRequires: %{python_module uvloop}
BuildRequires: %{python_module w3lib >= 1.17.2} BuildRequires: %{python_module w3lib >= 1.17.0}
BuildRequires: %{python_module zope.interface >= 4.1.3} BuildRequires: %{python_module zope.interface >= 4.1.3}
BuildRequires: fdupes BuildRequires: fdupes
BuildRequires: python-rpm-macros BuildRequires: python-rpm-macros
@@ -94,6 +91,7 @@ Provides documentation for %{name}.
%prep %prep
%setup -n Scrapy-%{version} %setup -n Scrapy-%{version}
%autopatch -p1 %autopatch -p1
sed -i -e 's:= python:= python3:g' docs/Makefile sed -i -e 's:= python:= python3:g' docs/Makefile
%build %build

View File

@@ -1,11 +0,0 @@
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@ install_requires = [
'zope.interface>=4.1.3',
'protego>=0.1.15',
'itemadapter>=0.1.0',
- 'h2>=3.0,<4.0',
+ 'h2>=3.0',
]
extras_require = {}
cpython_dependencies = [