forked from pool/python-Scrapy
Accepting request 917688 from home:fusionfuture:branches:devel:languages:python
- Remove h2 < 4.0 dependency version restriction. (boo#1190035)
* remove-h2-version-restriction.patch
- Add peak method to queues to fix build with queuelib 1.6.2.
* add-peak-method-to-queues.patch
- Drop support for Python 3.6 as python-uvloop does not support it.
- Require testfixtures >= 6.0.0 (tests need LogCapture.check_present).
(2953bb4caa
)
OBS-URL: https://build.opensuse.org/request/show/917688
OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-Scrapy?expand=0&rev=20
This commit is contained in:
581
add-peak-method-to-queues.patch
Normal file
581
add-peak-method-to-queues.patch
Normal file
@@ -0,0 +1,581 @@
|
|||||||
|
|
||||||
|
--- a/scrapy/pqueues.py
|
||||||
|
+++ b/scrapy/pqueues.py
|
||||||
|
@@ -3,6 +3,7 @@ import logging
|
||||||
|
|
||||||
|
from scrapy.utils.misc import create_instance
|
||||||
|
|
||||||
|
+
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@@ -17,8 +18,7 @@ def _path_safe(text):
|
||||||
|
>>> _path_safe('some@symbol?').startswith('some_symbol_')
|
||||||
|
True
|
||||||
|
"""
|
||||||
|
- pathable_slot = "".join([c if c.isalnum() or c in '-._' else '_'
|
||||||
|
- for c in text])
|
||||||
|
+ pathable_slot = "".join([c if c.isalnum() or c in '-._' else '_' for c in text])
|
||||||
|
# as we replace some letters we can get collision for different slots
|
||||||
|
# add we add unique part
|
||||||
|
unique_slot = hashlib.md5(text.encode('utf8')).hexdigest()
|
||||||
|
@@ -35,6 +35,9 @@ class ScrapyPriorityQueue:
|
||||||
|
* close()
|
||||||
|
* __len__()
|
||||||
|
|
||||||
|
+ Optionally, the queue could provide a ``peek`` method, that should return the
|
||||||
|
+ next object to be returned by ``pop``, but without removing it from the queue.
|
||||||
|
+
|
||||||
|
``__init__`` method of ScrapyPriorityQueue receives a downstream_queue_cls
|
||||||
|
argument, which is a class used to instantiate a new (internal) queue when
|
||||||
|
a new priority is allocated.
|
||||||
|
@@ -70,10 +73,12 @@ class ScrapyPriorityQueue:
|
||||||
|
self.curprio = min(startprios)
|
||||||
|
|
||||||
|
def qfactory(self, key):
|
||||||
|
- return create_instance(self.downstream_queue_cls,
|
||||||
|
- None,
|
||||||
|
- self.crawler,
|
||||||
|
- self.key + '/' + str(key))
|
||||||
|
+ return create_instance(
|
||||||
|
+ self.downstream_queue_cls,
|
||||||
|
+ None,
|
||||||
|
+ self.crawler,
|
||||||
|
+ self.key + '/' + str(key),
|
||||||
|
+ )
|
||||||
|
|
||||||
|
def priority(self, request):
|
||||||
|
return -request.priority
|
||||||
|
@@ -99,6 +104,18 @@ class ScrapyPriorityQueue:
|
||||||
|
self.curprio = min(prios) if prios else None
|
||||||
|
return m
|
||||||
|
|
||||||
|
+ def peek(self):
|
||||||
|
+ """Returns the next object to be returned by :meth:`pop`,
|
||||||
|
+ but without removing it from the queue.
|
||||||
|
+
|
||||||
|
+ Raises :exc:`NotImplementedError` if the underlying queue class does
|
||||||
|
+ not implement a ``peek`` method, which is optional for queues.
|
||||||
|
+ """
|
||||||
|
+ if self.curprio is None:
|
||||||
|
+ return None
|
||||||
|
+ queue = self.queues[self.curprio]
|
||||||
|
+ return queue.peek()
|
||||||
|
+
|
||||||
|
def close(self):
|
||||||
|
active = []
|
||||||
|
for p, q in self.queues.items():
|
||||||
|
@@ -116,8 +133,7 @@ class DownloaderInterface:
|
||||||
|
self.downloader = crawler.engine.downloader
|
||||||
|
|
||||||
|
def stats(self, possible_slots):
|
||||||
|
- return [(self._active_downloads(slot), slot)
|
||||||
|
- for slot in possible_slots]
|
||||||
|
+ return [(self._active_downloads(slot), slot) for slot in possible_slots]
|
||||||
|
|
||||||
|
def get_slot_key(self, request):
|
||||||
|
return self.downloader._get_slot_key(request, None)
|
||||||
|
@@ -162,10 +178,12 @@ class DownloaderAwarePriorityQueue:
|
||||||
|
self.pqueues[slot] = self.pqfactory(slot, startprios)
|
||||||
|
|
||||||
|
def pqfactory(self, slot, startprios=()):
|
||||||
|
- return ScrapyPriorityQueue(self.crawler,
|
||||||
|
- self.downstream_queue_cls,
|
||||||
|
- self.key + '/' + _path_safe(slot),
|
||||||
|
- startprios)
|
||||||
|
+ return ScrapyPriorityQueue(
|
||||||
|
+ self.crawler,
|
||||||
|
+ self.downstream_queue_cls,
|
||||||
|
+ self.key + '/' + _path_safe(slot),
|
||||||
|
+ startprios,
|
||||||
|
+ )
|
||||||
|
|
||||||
|
def pop(self):
|
||||||
|
stats = self._downloader_interface.stats(self.pqueues)
|
||||||
|
@@ -187,9 +205,22 @@ class DownloaderAwarePriorityQueue:
|
||||||
|
queue = self.pqueues[slot]
|
||||||
|
queue.push(request)
|
||||||
|
|
||||||
|
+ def peek(self):
|
||||||
|
+ """Returns the next object to be returned by :meth:`pop`,
|
||||||
|
+ but without removing it from the queue.
|
||||||
|
+
|
||||||
|
+ Raises :exc:`NotImplementedError` if the underlying queue class does
|
||||||
|
+ not implement a ``peek`` method, which is optional for queues.
|
||||||
|
+ """
|
||||||
|
+ stats = self._downloader_interface.stats(self.pqueues)
|
||||||
|
+ if not stats:
|
||||||
|
+ return None
|
||||||
|
+ slot = min(stats)[1]
|
||||||
|
+ queue = self.pqueues[slot]
|
||||||
|
+ return queue.peek()
|
||||||
|
+
|
||||||
|
def close(self):
|
||||||
|
- active = {slot: queue.close()
|
||||||
|
- for slot, queue in self.pqueues.items()}
|
||||||
|
+ active = {slot: queue.close() for slot, queue in self.pqueues.items()}
|
||||||
|
self.pqueues.clear()
|
||||||
|
return active
|
||||||
|
|
||||||
|
--- a/scrapy/squeues.py
|
||||||
|
+++ b/scrapy/squeues.py
|
||||||
|
@@ -19,7 +19,6 @@ def _with_mkdir(queue_class):
|
||||||
|
dirname = os.path.dirname(path)
|
||||||
|
if not os.path.exists(dirname):
|
||||||
|
os.makedirs(dirname, exist_ok=True)
|
||||||
|
-
|
||||||
|
super().__init__(path, *args, **kwargs)
|
||||||
|
|
||||||
|
return DirectoriesCreated
|
||||||
|
@@ -38,6 +37,20 @@ def _serializable_queue(queue_class, ser
|
||||||
|
if s:
|
||||||
|
return deserialize(s)
|
||||||
|
|
||||||
|
+ def peek(self):
|
||||||
|
+ """Returns the next object to be returned by :meth:`pop`,
|
||||||
|
+ but without removing it from the queue.
|
||||||
|
+
|
||||||
|
+ Raises :exc:`NotImplementedError` if the underlying queue class does
|
||||||
|
+ not implement a ``peek`` method, which is optional for queues.
|
||||||
|
+ """
|
||||||
|
+ try:
|
||||||
|
+ s = super().peek()
|
||||||
|
+ except AttributeError as ex:
|
||||||
|
+ raise NotImplementedError("The underlying queue class does not implement 'peek'") from ex
|
||||||
|
+ if s:
|
||||||
|
+ return deserialize(s)
|
||||||
|
+
|
||||||
|
return SerializableQueue
|
||||||
|
|
||||||
|
|
||||||
|
@@ -59,12 +72,21 @@ def _scrapy_serialization_queue(queue_cl
|
||||||
|
|
||||||
|
def pop(self):
|
||||||
|
request = super().pop()
|
||||||
|
-
|
||||||
|
if not request:
|
||||||
|
return None
|
||||||
|
+ return request_from_dict(request, self.spider)
|
||||||
|
|
||||||
|
- request = request_from_dict(request, self.spider)
|
||||||
|
- return request
|
||||||
|
+ def peek(self):
|
||||||
|
+ """Returns the next object to be returned by :meth:`pop`,
|
||||||
|
+ but without removing it from the queue.
|
||||||
|
+
|
||||||
|
+ Raises :exc:`NotImplementedError` if the underlying queue class does
|
||||||
|
+ not implement a ``peek`` method, which is optional for queues.
|
||||||
|
+ """
|
||||||
|
+ request = super().peek()
|
||||||
|
+ if not request:
|
||||||
|
+ return None
|
||||||
|
+ return request_from_dict(request, self.spider)
|
||||||
|
|
||||||
|
return ScrapyRequestQueue
|
||||||
|
|
||||||
|
@@ -76,6 +98,19 @@ def _scrapy_non_serialization_queue(queu
|
||||||
|
def from_crawler(cls, crawler, *args, **kwargs):
|
||||||
|
return cls()
|
||||||
|
|
||||||
|
+ def peek(self):
|
||||||
|
+ """Returns the next object to be returned by :meth:`pop`,
|
||||||
|
+ but without removing it from the queue.
|
||||||
|
+
|
||||||
|
+ Raises :exc:`NotImplementedError` if the underlying queue class does
|
||||||
|
+ not implement a ``peek`` method, which is optional for queues.
|
||||||
|
+ """
|
||||||
|
+ try:
|
||||||
|
+ s = super().peek()
|
||||||
|
+ except AttributeError as ex:
|
||||||
|
+ raise NotImplementedError("The underlying queue class does not implement 'peek'") from ex
|
||||||
|
+ return s
|
||||||
|
+
|
||||||
|
return ScrapyRequestQueue
|
||||||
|
|
||||||
|
|
||||||
|
@@ -109,17 +144,9 @@ MarshalLifoDiskQueueNonRequest = _serial
|
||||||
|
marshal.loads
|
||||||
|
)
|
||||||
|
|
||||||
|
-PickleFifoDiskQueue = _scrapy_serialization_queue(
|
||||||
|
- PickleFifoDiskQueueNonRequest
|
||||||
|
-)
|
||||||
|
-PickleLifoDiskQueue = _scrapy_serialization_queue(
|
||||||
|
- PickleLifoDiskQueueNonRequest
|
||||||
|
-)
|
||||||
|
-MarshalFifoDiskQueue = _scrapy_serialization_queue(
|
||||||
|
- MarshalFifoDiskQueueNonRequest
|
||||||
|
-)
|
||||||
|
-MarshalLifoDiskQueue = _scrapy_serialization_queue(
|
||||||
|
- MarshalLifoDiskQueueNonRequest
|
||||||
|
-)
|
||||||
|
+PickleFifoDiskQueue = _scrapy_serialization_queue(PickleFifoDiskQueueNonRequest)
|
||||||
|
+PickleLifoDiskQueue = _scrapy_serialization_queue(PickleLifoDiskQueueNonRequest)
|
||||||
|
+MarshalFifoDiskQueue = _scrapy_serialization_queue(MarshalFifoDiskQueueNonRequest)
|
||||||
|
+MarshalLifoDiskQueue = _scrapy_serialization_queue(MarshalLifoDiskQueueNonRequest)
|
||||||
|
FifoMemoryQueue = _scrapy_non_serialization_queue(queue.FifoMemoryQueue)
|
||||||
|
LifoMemoryQueue = _scrapy_non_serialization_queue(queue.LifoMemoryQueue)
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/tests/test_pqueues.py
|
||||||
|
@@ -0,0 +1,144 @@
|
||||||
|
+import tempfile
|
||||||
|
+import unittest
|
||||||
|
+
|
||||||
|
+import queuelib
|
||||||
|
+
|
||||||
|
+from scrapy.http.request import Request
|
||||||
|
+from scrapy.pqueues import ScrapyPriorityQueue, DownloaderAwarePriorityQueue
|
||||||
|
+from scrapy.spiders import Spider
|
||||||
|
+from scrapy.squeues import FifoMemoryQueue
|
||||||
|
+from scrapy.utils.test import get_crawler
|
||||||
|
+
|
||||||
|
+from tests.test_scheduler import MockDownloader, MockEngine
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class PriorityQueueTest(unittest.TestCase):
|
||||||
|
+ def setUp(self):
|
||||||
|
+ self.crawler = get_crawler(Spider)
|
||||||
|
+ self.spider = self.crawler._create_spider("foo")
|
||||||
|
+
|
||||||
|
+ def test_queue_push_pop_one(self):
|
||||||
|
+ temp_dir = tempfile.mkdtemp()
|
||||||
|
+ queue = ScrapyPriorityQueue.from_crawler(self.crawler, FifoMemoryQueue, temp_dir)
|
||||||
|
+ self.assertIsNone(queue.pop())
|
||||||
|
+ self.assertEqual(len(queue), 0)
|
||||||
|
+ req1 = Request("https://example.org/1", priority=1)
|
||||||
|
+ queue.push(req1)
|
||||||
|
+ self.assertEqual(len(queue), 1)
|
||||||
|
+ dequeued = queue.pop()
|
||||||
|
+ self.assertEqual(len(queue), 0)
|
||||||
|
+ self.assertEqual(dequeued.url, req1.url)
|
||||||
|
+ self.assertEqual(dequeued.priority, req1.priority)
|
||||||
|
+ self.assertEqual(queue.close(), [])
|
||||||
|
+
|
||||||
|
+ def test_no_peek_raises(self):
|
||||||
|
+ if hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
|
||||||
|
+ raise unittest.SkipTest("queuelib.queue.FifoMemoryQueue.peek is defined")
|
||||||
|
+ temp_dir = tempfile.mkdtemp()
|
||||||
|
+ queue = ScrapyPriorityQueue.from_crawler(self.crawler, FifoMemoryQueue, temp_dir)
|
||||||
|
+ queue.push(Request("https://example.org"))
|
||||||
|
+ with self.assertRaises(NotImplementedError, msg="The underlying queue class does not implement 'peek'"):
|
||||||
|
+ queue.peek()
|
||||||
|
+ queue.close()
|
||||||
|
+
|
||||||
|
+ def test_peek(self):
|
||||||
|
+ if not hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
|
||||||
|
+ raise unittest.SkipTest("queuelib.queue.FifoMemoryQueue.peek is undefined")
|
||||||
|
+ temp_dir = tempfile.mkdtemp()
|
||||||
|
+ queue = ScrapyPriorityQueue.from_crawler(self.crawler, FifoMemoryQueue, temp_dir)
|
||||||
|
+ self.assertEqual(len(queue), 0)
|
||||||
|
+ self.assertIsNone(queue.peek())
|
||||||
|
+ req1 = Request("https://example.org/1")
|
||||||
|
+ req2 = Request("https://example.org/2")
|
||||||
|
+ req3 = Request("https://example.org/3")
|
||||||
|
+ queue.push(req1)
|
||||||
|
+ queue.push(req2)
|
||||||
|
+ queue.push(req3)
|
||||||
|
+ self.assertEqual(len(queue), 3)
|
||||||
|
+ self.assertEqual(queue.peek().url, req1.url)
|
||||||
|
+ self.assertEqual(queue.pop().url, req1.url)
|
||||||
|
+ self.assertEqual(len(queue), 2)
|
||||||
|
+ self.assertEqual(queue.peek().url, req2.url)
|
||||||
|
+ self.assertEqual(queue.pop().url, req2.url)
|
||||||
|
+ self.assertEqual(len(queue), 1)
|
||||||
|
+ self.assertEqual(queue.peek().url, req3.url)
|
||||||
|
+ self.assertEqual(queue.pop().url, req3.url)
|
||||||
|
+ self.assertEqual(queue.close(), [])
|
||||||
|
+
|
||||||
|
+ def test_queue_push_pop_priorities(self):
|
||||||
|
+ temp_dir = tempfile.mkdtemp()
|
||||||
|
+ queue = ScrapyPriorityQueue.from_crawler(self.crawler, FifoMemoryQueue, temp_dir, [-1, -2, -3])
|
||||||
|
+ self.assertIsNone(queue.pop())
|
||||||
|
+ self.assertEqual(len(queue), 0)
|
||||||
|
+ req1 = Request("https://example.org/1", priority=1)
|
||||||
|
+ req2 = Request("https://example.org/2", priority=2)
|
||||||
|
+ req3 = Request("https://example.org/3", priority=3)
|
||||||
|
+ queue.push(req1)
|
||||||
|
+ queue.push(req2)
|
||||||
|
+ queue.push(req3)
|
||||||
|
+ self.assertEqual(len(queue), 3)
|
||||||
|
+ dequeued = queue.pop()
|
||||||
|
+ self.assertEqual(len(queue), 2)
|
||||||
|
+ self.assertEqual(dequeued.url, req3.url)
|
||||||
|
+ self.assertEqual(dequeued.priority, req3.priority)
|
||||||
|
+ self.assertEqual(queue.close(), [-1, -2])
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class DownloaderAwarePriorityQueueTest(unittest.TestCase):
|
||||||
|
+ def setUp(self):
|
||||||
|
+ crawler = get_crawler(Spider)
|
||||||
|
+ crawler.engine = MockEngine(downloader=MockDownloader())
|
||||||
|
+ self.queue = DownloaderAwarePriorityQueue.from_crawler(
|
||||||
|
+ crawler=crawler,
|
||||||
|
+ downstream_queue_cls=FifoMemoryQueue,
|
||||||
|
+ key="foo/bar",
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
+ def tearDown(self):
|
||||||
|
+ self.queue.close()
|
||||||
|
+
|
||||||
|
+ def test_push_pop(self):
|
||||||
|
+ self.assertEqual(len(self.queue), 0)
|
||||||
|
+ self.assertIsNone(self.queue.pop())
|
||||||
|
+ req1 = Request("http://www.example.com/1")
|
||||||
|
+ req2 = Request("http://www.example.com/2")
|
||||||
|
+ req3 = Request("http://www.example.com/3")
|
||||||
|
+ self.queue.push(req1)
|
||||||
|
+ self.queue.push(req2)
|
||||||
|
+ self.queue.push(req3)
|
||||||
|
+ self.assertEqual(len(self.queue), 3)
|
||||||
|
+ self.assertEqual(self.queue.pop().url, req1.url)
|
||||||
|
+ self.assertEqual(len(self.queue), 2)
|
||||||
|
+ self.assertEqual(self.queue.pop().url, req2.url)
|
||||||
|
+ self.assertEqual(len(self.queue), 1)
|
||||||
|
+ self.assertEqual(self.queue.pop().url, req3.url)
|
||||||
|
+ self.assertEqual(len(self.queue), 0)
|
||||||
|
+ self.assertIsNone(self.queue.pop())
|
||||||
|
+
|
||||||
|
+ def test_no_peek_raises(self):
|
||||||
|
+ if hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
|
||||||
|
+ raise unittest.SkipTest("queuelib.queue.FifoMemoryQueue.peek is defined")
|
||||||
|
+ self.queue.push(Request("https://example.org"))
|
||||||
|
+ with self.assertRaises(NotImplementedError, msg="The underlying queue class does not implement 'peek'"):
|
||||||
|
+ self.queue.peek()
|
||||||
|
+
|
||||||
|
+ def test_peek(self):
|
||||||
|
+ if not hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
|
||||||
|
+ raise unittest.SkipTest("queuelib.queue.FifoMemoryQueue.peek is undefined")
|
||||||
|
+ self.assertEqual(len(self.queue), 0)
|
||||||
|
+ req1 = Request("https://example.org/1")
|
||||||
|
+ req2 = Request("https://example.org/2")
|
||||||
|
+ req3 = Request("https://example.org/3")
|
||||||
|
+ self.queue.push(req1)
|
||||||
|
+ self.queue.push(req2)
|
||||||
|
+ self.queue.push(req3)
|
||||||
|
+ self.assertEqual(len(self.queue), 3)
|
||||||
|
+ self.assertEqual(self.queue.peek().url, req1.url)
|
||||||
|
+ self.assertEqual(self.queue.pop().url, req1.url)
|
||||||
|
+ self.assertEqual(len(self.queue), 2)
|
||||||
|
+ self.assertEqual(self.queue.peek().url, req2.url)
|
||||||
|
+ self.assertEqual(self.queue.pop().url, req2.url)
|
||||||
|
+ self.assertEqual(len(self.queue), 1)
|
||||||
|
+ self.assertEqual(self.queue.peek().url, req3.url)
|
||||||
|
+ self.assertEqual(self.queue.pop().url, req3.url)
|
||||||
|
+ self.assertIsNone(self.queue.peek())
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/tests/test_squeues_request.py
|
||||||
|
@@ -0,0 +1,214 @@
|
||||||
|
+import shutil
|
||||||
|
+import tempfile
|
||||||
|
+import unittest
|
||||||
|
+
|
||||||
|
+import queuelib
|
||||||
|
+
|
||||||
|
+from scrapy.squeues import (
|
||||||
|
+ PickleFifoDiskQueue,
|
||||||
|
+ PickleLifoDiskQueue,
|
||||||
|
+ MarshalFifoDiskQueue,
|
||||||
|
+ MarshalLifoDiskQueue,
|
||||||
|
+ FifoMemoryQueue,
|
||||||
|
+ LifoMemoryQueue,
|
||||||
|
+)
|
||||||
|
+from scrapy.http import Request
|
||||||
|
+from scrapy.spiders import Spider
|
||||||
|
+from scrapy.utils.test import get_crawler
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+"""
|
||||||
|
+Queues that handle requests
|
||||||
|
+"""
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class BaseQueueTestCase(unittest.TestCase):
|
||||||
|
+ def setUp(self):
|
||||||
|
+ self.tmpdir = tempfile.mkdtemp(prefix="scrapy-queue-tests-")
|
||||||
|
+ self.qpath = self.tempfilename()
|
||||||
|
+ self.qdir = self.mkdtemp()
|
||||||
|
+ self.crawler = get_crawler(Spider)
|
||||||
|
+
|
||||||
|
+ def tearDown(self):
|
||||||
|
+ shutil.rmtree(self.tmpdir)
|
||||||
|
+
|
||||||
|
+ def tempfilename(self):
|
||||||
|
+ with tempfile.NamedTemporaryFile(dir=self.tmpdir) as nf:
|
||||||
|
+ return nf.name
|
||||||
|
+
|
||||||
|
+ def mkdtemp(self):
|
||||||
|
+ return tempfile.mkdtemp(dir=self.tmpdir)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class RequestQueueTestMixin:
|
||||||
|
+ def queue(self):
|
||||||
|
+ raise NotImplementedError()
|
||||||
|
+
|
||||||
|
+ def test_one_element_with_peek(self):
|
||||||
|
+ if not hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
|
||||||
|
+ raise unittest.SkipTest("The queuelib queues do not define peek")
|
||||||
|
+ q = self.queue()
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.peek())
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ req = Request("http://www.example.com")
|
||||||
|
+ q.push(req)
|
||||||
|
+ self.assertEqual(len(q), 1)
|
||||||
|
+ self.assertEqual(q.peek().url, req.url)
|
||||||
|
+ self.assertEqual(q.pop().url, req.url)
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.peek())
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ q.close()
|
||||||
|
+
|
||||||
|
+ def test_one_element_without_peek(self):
|
||||||
|
+ if hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
|
||||||
|
+ raise unittest.SkipTest("The queuelib queues define peek")
|
||||||
|
+ q = self.queue()
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ req = Request("http://www.example.com")
|
||||||
|
+ q.push(req)
|
||||||
|
+ self.assertEqual(len(q), 1)
|
||||||
|
+ with self.assertRaises(NotImplementedError, msg="The underlying queue class does not implement 'peek'"):
|
||||||
|
+ q.peek()
|
||||||
|
+ self.assertEqual(q.pop().url, req.url)
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ q.close()
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class FifoQueueMixin(RequestQueueTestMixin):
|
||||||
|
+ def test_fifo_with_peek(self):
|
||||||
|
+ if not hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
|
||||||
|
+ raise unittest.SkipTest("The queuelib queues do not define peek")
|
||||||
|
+ q = self.queue()
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.peek())
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ req1 = Request("http://www.example.com/1")
|
||||||
|
+ req2 = Request("http://www.example.com/2")
|
||||||
|
+ req3 = Request("http://www.example.com/3")
|
||||||
|
+ q.push(req1)
|
||||||
|
+ q.push(req2)
|
||||||
|
+ q.push(req3)
|
||||||
|
+ self.assertEqual(len(q), 3)
|
||||||
|
+ self.assertEqual(q.peek().url, req1.url)
|
||||||
|
+ self.assertEqual(q.pop().url, req1.url)
|
||||||
|
+ self.assertEqual(len(q), 2)
|
||||||
|
+ self.assertEqual(q.peek().url, req2.url)
|
||||||
|
+ self.assertEqual(q.pop().url, req2.url)
|
||||||
|
+ self.assertEqual(len(q), 1)
|
||||||
|
+ self.assertEqual(q.peek().url, req3.url)
|
||||||
|
+ self.assertEqual(q.pop().url, req3.url)
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.peek())
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ q.close()
|
||||||
|
+
|
||||||
|
+ def test_fifo_without_peek(self):
|
||||||
|
+ if hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
|
||||||
|
+ raise unittest.SkipTest("The queuelib queues do not define peek")
|
||||||
|
+ q = self.queue()
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ req1 = Request("http://www.example.com/1")
|
||||||
|
+ req2 = Request("http://www.example.com/2")
|
||||||
|
+ req3 = Request("http://www.example.com/3")
|
||||||
|
+ q.push(req1)
|
||||||
|
+ q.push(req2)
|
||||||
|
+ q.push(req3)
|
||||||
|
+ with self.assertRaises(NotImplementedError, msg="The underlying queue class does not implement 'peek'"):
|
||||||
|
+ q.peek()
|
||||||
|
+ self.assertEqual(len(q), 3)
|
||||||
|
+ self.assertEqual(q.pop().url, req1.url)
|
||||||
|
+ self.assertEqual(len(q), 2)
|
||||||
|
+ self.assertEqual(q.pop().url, req2.url)
|
||||||
|
+ self.assertEqual(len(q), 1)
|
||||||
|
+ self.assertEqual(q.pop().url, req3.url)
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ q.close()
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class LifoQueueMixin(RequestQueueTestMixin):
|
||||||
|
+ def test_lifo_with_peek(self):
|
||||||
|
+ if not hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
|
||||||
|
+ raise unittest.SkipTest("The queuelib queues do not define peek")
|
||||||
|
+ q = self.queue()
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.peek())
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ req1 = Request("http://www.example.com/1")
|
||||||
|
+ req2 = Request("http://www.example.com/2")
|
||||||
|
+ req3 = Request("http://www.example.com/3")
|
||||||
|
+ q.push(req1)
|
||||||
|
+ q.push(req2)
|
||||||
|
+ q.push(req3)
|
||||||
|
+ self.assertEqual(len(q), 3)
|
||||||
|
+ self.assertEqual(q.peek().url, req3.url)
|
||||||
|
+ self.assertEqual(q.pop().url, req3.url)
|
||||||
|
+ self.assertEqual(len(q), 2)
|
||||||
|
+ self.assertEqual(q.peek().url, req2.url)
|
||||||
|
+ self.assertEqual(q.pop().url, req2.url)
|
||||||
|
+ self.assertEqual(len(q), 1)
|
||||||
|
+ self.assertEqual(q.peek().url, req1.url)
|
||||||
|
+ self.assertEqual(q.pop().url, req1.url)
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.peek())
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ q.close()
|
||||||
|
+
|
||||||
|
+ def test_lifo_without_peek(self):
|
||||||
|
+ if hasattr(queuelib.queue.FifoMemoryQueue, "peek"):
|
||||||
|
+ raise unittest.SkipTest("The queuelib queues do not define peek")
|
||||||
|
+ q = self.queue()
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ req1 = Request("http://www.example.com/1")
|
||||||
|
+ req2 = Request("http://www.example.com/2")
|
||||||
|
+ req3 = Request("http://www.example.com/3")
|
||||||
|
+ q.push(req1)
|
||||||
|
+ q.push(req2)
|
||||||
|
+ q.push(req3)
|
||||||
|
+ with self.assertRaises(NotImplementedError, msg="The underlying queue class does not implement 'peek'"):
|
||||||
|
+ q.peek()
|
||||||
|
+ self.assertEqual(len(q), 3)
|
||||||
|
+ self.assertEqual(q.pop().url, req3.url)
|
||||||
|
+ self.assertEqual(len(q), 2)
|
||||||
|
+ self.assertEqual(q.pop().url, req2.url)
|
||||||
|
+ self.assertEqual(len(q), 1)
|
||||||
|
+ self.assertEqual(q.pop().url, req1.url)
|
||||||
|
+ self.assertEqual(len(q), 0)
|
||||||
|
+ self.assertIsNone(q.pop())
|
||||||
|
+ q.close()
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class PickleFifoDiskQueueRequestTest(FifoQueueMixin, BaseQueueTestCase):
|
||||||
|
+ def queue(self):
|
||||||
|
+ return PickleFifoDiskQueue.from_crawler(crawler=self.crawler, key="pickle/fifo")
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class PickleLifoDiskQueueRequestTest(LifoQueueMixin, BaseQueueTestCase):
|
||||||
|
+ def queue(self):
|
||||||
|
+ return PickleLifoDiskQueue.from_crawler(crawler=self.crawler, key="pickle/lifo")
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class MarshalFifoDiskQueueRequestTest(FifoQueueMixin, BaseQueueTestCase):
|
||||||
|
+ def queue(self):
|
||||||
|
+ return MarshalFifoDiskQueue.from_crawler(crawler=self.crawler, key="marshal/fifo")
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class MarshalLifoDiskQueueRequestTest(LifoQueueMixin, BaseQueueTestCase):
|
||||||
|
+ def queue(self):
|
||||||
|
+ return MarshalLifoDiskQueue.from_crawler(crawler=self.crawler, key="marshal/lifo")
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class FifoMemoryQueueRequestTest(FifoQueueMixin, BaseQueueTestCase):
|
||||||
|
+ def queue(self):
|
||||||
|
+ return FifoMemoryQueue.from_crawler(crawler=self.crawler)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+class LifoMemoryQueueRequestTest(LifoQueueMixin, BaseQueueTestCase):
|
||||||
|
+ def queue(self):
|
||||||
|
+ return LifoMemoryQueue.from_crawler(crawler=self.crawler)
|
@@ -1,3 +1,14 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Wed Sep 1 04:25:44 UTC 2021 - Fusion Future <qydwhotmail@gmail.com>
|
||||||
|
|
||||||
|
- Remove h2 < 4.0 dependency version restriction. (boo#1190035)
|
||||||
|
* remove-h2-version-restriction.patch
|
||||||
|
- Add peak method to queues to fix build with queuelib 1.6.2.
|
||||||
|
* add-peak-method-to-queues.patch
|
||||||
|
- Drop support for Python 3.6 as python-uvloop does not support it.
|
||||||
|
- Require testfixtures >= 6.0.0 (tests need LogCapture.check_present).
|
||||||
|
(https://github.com/Simplistix/testfixtures/commit/2953bb4caadc1a462e5332ffb01591ba1fc3284f)
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Wed Apr 28 09:29:08 UTC 2021 - Ben Greiner <code@bnavigator.de>
|
Wed Apr 28 09:29:08 UTC 2021 - Ben Greiner <code@bnavigator.de>
|
||||||
|
|
||||||
|
@@ -18,6 +18,8 @@
|
|||||||
|
|
||||||
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
|
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
|
||||||
%define skip_python2 1
|
%define skip_python2 1
|
||||||
|
# python-uvloop does not support python3.6
|
||||||
|
%define skip_python36 1
|
||||||
Name: python-Scrapy
|
Name: python-Scrapy
|
||||||
Version: 2.5.0
|
Version: 2.5.0
|
||||||
Release: 0
|
Release: 0
|
||||||
@@ -26,6 +28,10 @@ License: BSD-3-Clause
|
|||||||
Group: Development/Languages/Python
|
Group: Development/Languages/Python
|
||||||
URL: https://scrapy.org
|
URL: https://scrapy.org
|
||||||
Source: https://files.pythonhosted.org/packages/source/S/Scrapy/Scrapy-%{version}.tar.gz
|
Source: https://files.pythonhosted.org/packages/source/S/Scrapy/Scrapy-%{version}.tar.gz
|
||||||
|
# PATCH-FIX-OPENSUSE remove-h2-version-restriction.patch boo#1190035 -- run scrapy with h2 >= 4.0.0
|
||||||
|
Patch0: remove-h2-version-restriction.patch
|
||||||
|
# PATCH-FIX-UPSTREAM add-peak-method-to-queues.patch https://github.com/scrapy/scrapy/commit/68379197986ae3deb81a545b5fd6920ea3347094
|
||||||
|
Patch1: add-peak-method-to-queues.patch
|
||||||
BuildRequires: %{python_module Pillow}
|
BuildRequires: %{python_module Pillow}
|
||||||
BuildRequires: %{python_module Protego >= 0.1.15}
|
BuildRequires: %{python_module Protego >= 0.1.15}
|
||||||
BuildRequires: %{python_module PyDispatcher >= 2.0.5}
|
BuildRequires: %{python_module PyDispatcher >= 2.0.5}
|
||||||
@@ -47,7 +53,7 @@ BuildRequires: %{python_module queuelib >= 1.4.2}
|
|||||||
BuildRequires: %{python_module service_identity >= 16.0.0}
|
BuildRequires: %{python_module service_identity >= 16.0.0}
|
||||||
BuildRequires: %{python_module setuptools}
|
BuildRequires: %{python_module setuptools}
|
||||||
BuildRequires: %{python_module sybil}
|
BuildRequires: %{python_module sybil}
|
||||||
BuildRequires: %{python_module testfixtures}
|
BuildRequires: %{python_module testfixtures >= 6.0.0}
|
||||||
BuildRequires: %{python_module uvloop}
|
BuildRequires: %{python_module uvloop}
|
||||||
BuildRequires: %{python_module w3lib >= 1.17.2}
|
BuildRequires: %{python_module w3lib >= 1.17.2}
|
||||||
BuildRequires: %{python_module zope.interface >= 4.1.3}
|
BuildRequires: %{python_module zope.interface >= 4.1.3}
|
||||||
@@ -88,7 +94,8 @@ Group: Documentation/HTML
|
|||||||
Provides documentation for %{name}.
|
Provides documentation for %{name}.
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
%setup -q -n Scrapy-%{version}
|
%setup -n Scrapy-%{version}
|
||||||
|
%autopatch -p1
|
||||||
sed -i -e 's:= python:= python3:g' docs/Makefile
|
sed -i -e 's:= python:= python3:g' docs/Makefile
|
||||||
|
|
||||||
%build
|
%build
|
||||||
|
11
remove-h2-version-restriction.patch
Normal file
11
remove-h2-version-restriction.patch
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
--- a/setup.py
|
||||||
|
+++ b/setup.py
|
||||||
|
@@ -31,7 +31,7 @@ install_requires = [
|
||||||
|
'zope.interface>=4.1.3',
|
||||||
|
'protego>=0.1.15',
|
||||||
|
'itemadapter>=0.1.0',
|
||||||
|
- 'h2>=3.0,<4.0',
|
||||||
|
+ 'h2>=3.0',
|
||||||
|
]
|
||||||
|
extras_require = {}
|
||||||
|
cpython_dependencies = [
|
Reference in New Issue
Block a user