forked from pool/python-parsel
- added patches
fix ddb3708b9a
+ python-parsel-drop-python-2.patch
OBS-URL: https://build.opensuse.org/request/show/1026493
OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-parsel?expand=0&rev=19
1519 lines
63 KiB
Diff
1519 lines
63 KiB
Diff
diff --git a/README.rst b/README.rst
|
|
index c1674f1..7bd8204 100644
|
|
--- a/README.rst
|
|
+++ b/README.rst
|
|
@@ -26,7 +26,7 @@ Example (`open online demo`_):
|
|
.. code-block:: python
|
|
|
|
>>> from parsel import Selector
|
|
- >>> selector = Selector(text=u"""<html>
|
|
+ >>> selector = Selector(text="""<html>
|
|
<body>
|
|
<h1>Hello, Parsel!</h1>
|
|
<ul>
|
|
diff --git a/docs/conf.py b/docs/conf.py
|
|
index 27eef0e..f3736de 100755
|
|
--- a/docs/conf.py
|
|
+++ b/docs/conf.py
|
|
@@ -1,5 +1,4 @@
|
|
#!/usr/bin/env python
|
|
-# -*- coding: utf-8 -*-
|
|
|
|
import os
|
|
import sys
|
|
@@ -38,8 +37,8 @@
|
|
master_doc = 'index'
|
|
|
|
# General information about the project.
|
|
-project = u'Parsel'
|
|
-copyright = u'2015, Scrapy Project'
|
|
+project = 'Parsel'
|
|
+copyright = '2015, Scrapy Project'
|
|
|
|
# The version info for the project you're documenting, acts as replacement
|
|
# for |version| and |release|, also used in various other places throughout
|
|
@@ -83,8 +82,8 @@
|
|
# [howto/manual]).
|
|
latex_documents = [
|
|
('index', 'parsel.tex',
|
|
- u'Parsel Documentation',
|
|
- u'Scrapy Project', 'manual'),
|
|
+ 'Parsel Documentation',
|
|
+ 'Scrapy Project', 'manual'),
|
|
]
|
|
|
|
|
|
@@ -94,8 +93,8 @@
|
|
# (source start file, name, description, authors, manual section).
|
|
man_pages = [
|
|
('index', 'parsel',
|
|
- u'Parsel Documentation',
|
|
- [u'Scrapy Project'], 1)
|
|
+ 'Parsel Documentation',
|
|
+ ['Scrapy Project'], 1)
|
|
]
|
|
|
|
|
|
@@ -106,8 +105,8 @@
|
|
# dir menu entry, description, category)
|
|
texinfo_documents = [
|
|
('index', 'parsel',
|
|
- u'Parsel Documentation',
|
|
- u'Scrapy Project',
|
|
+ 'Parsel Documentation',
|
|
+ 'Scrapy Project',
|
|
'parsel',
|
|
'One line description of project.',
|
|
'Miscellaneous'),
|
|
diff --git a/docs/usage.rst b/docs/usage.rst
|
|
index f5950a8..55e6a31 100644
|
|
--- a/docs/usage.rst
|
|
+++ b/docs/usage.rst
|
|
@@ -8,11 +8,9 @@ Create a :class:`~parsel.selector.Selector` object for the HTML or XML text
|
|
that you want to parse::
|
|
|
|
>>> from parsel import Selector
|
|
- >>> text = u"<html><body><h1>Hello, Parsel!</h1></body></html>"
|
|
+ >>> text = "<html><body><h1>Hello, Parsel!</h1></body></html>"
|
|
>>> selector = Selector(text=text)
|
|
|
|
-.. note:: In Python 2, the ``text`` argument must be a ``unicode`` string.
|
|
-
|
|
Then use `CSS`_ or `XPath`_ expressions to select elements::
|
|
|
|
>>> selector.css('h1')
|
|
@@ -412,7 +410,7 @@ classes.
|
|
Example removing an ad from a blog post:
|
|
|
|
>>> from parsel import Selector
|
|
- >>> doc = u"""
|
|
+ >>> doc = """
|
|
... <article>
|
|
... <div class="row">Content paragraph...</div>
|
|
... <div class="row">
|
|
@@ -455,7 +453,7 @@ The ``test()`` function, for example, can prove quite useful when XPath's
|
|
Example selecting links in list item with a "class" attribute ending with a digit::
|
|
|
|
>>> from parsel import Selector
|
|
- >>> doc = u"""
|
|
+ >>> doc = """
|
|
... <div>
|
|
... <ul>
|
|
... <li class="item-0"><a href="link1.html">first item</a></li>
|
|
@@ -487,7 +485,7 @@ extracting text elements for example.
|
|
Example extracting microdata (sample content taken from http://schema.org/Product)
|
|
with groups of itemscopes and corresponding itemprops::
|
|
|
|
- >>> doc = u"""
|
|
+ >>> doc = """
|
|
... <div itemscope itemtype="http://schema.org/Product">
|
|
... <span itemprop="name">Kenmore White 17" Microwave</span>
|
|
... <img src="kenmore-microwave-17in.jpg" alt='Kenmore 17" Microwave' />
|
|
@@ -591,7 +589,7 @@ returns ``True`` for nodes that have all of the specified HTML classes::
|
|
... <p>Fourth</p>
|
|
... """)
|
|
...
|
|
- >>> sel = Selector(u"""
|
|
+ >>> sel = Selector("""
|
|
... <p class="foo bar-baz">First</p>
|
|
... <p class="foo">Second</p>
|
|
... <p class="bar">Third</p>
|
|
@@ -1111,7 +1109,7 @@ Named variables can be useful when strings need to be escaped for single
|
|
or double quotes characters. The example below would be a bit tricky to
|
|
get right (or legible) without a variable reference::
|
|
|
|
- >>> html = u'''<html>
|
|
+ >>> html = '''<html>
|
|
... <body>
|
|
... <p>He said: "I don't know why, but I like mixing single and double quotes!"</p>
|
|
... </body>
|
|
diff --git a/parsel/csstranslator.py b/parsel/csstranslator.py
|
|
index 747e808..3881736 100644
|
|
--- a/parsel/csstranslator.py
|
|
+++ b/parsel/csstranslator.py
|
|
@@ -1,7 +1,4 @@
|
|
-try:
|
|
- from functools import lru_cache
|
|
-except ImportError:
|
|
- from functools32 import lru_cache
|
|
+from functools import lru_cache
|
|
|
|
from cssselect import GenericTranslator as OriginalGenericTranslator
|
|
from cssselect import HTMLTranslator as OriginalHTMLTranslator
|
|
@@ -23,7 +20,7 @@ def from_xpath(cls, xpath, textnode=False, attribute=None):
|
|
return x
|
|
|
|
def __str__(self):
|
|
- path = super(XPathExpr, self).__str__()
|
|
+ path = super().__str__()
|
|
if self.textnode:
|
|
if path == '*':
|
|
path = 'text()'
|
|
@@ -40,20 +37,20 @@ def __str__(self):
|
|
return path
|
|
|
|
def join(self, combiner, other):
|
|
- super(XPathExpr, self).join(combiner, other)
|
|
+ super().join(combiner, other)
|
|
self.textnode = other.textnode
|
|
self.attribute = other.attribute
|
|
return self
|
|
|
|
|
|
-class TranslatorMixin(object):
|
|
+class TranslatorMixin:
|
|
"""This mixin adds support to CSS pseudo elements via dynamic dispatch.
|
|
|
|
Currently supported pseudo-elements are ``::text`` and ``::attr(ATTR_NAME)``.
|
|
"""
|
|
|
|
def xpath_element(self, selector):
|
|
- xpath = super(TranslatorMixin, self).xpath_element(selector)
|
|
+ xpath = super().xpath_element(selector)
|
|
return XPathExpr.from_xpath(xpath)
|
|
|
|
def xpath_pseudo_element(self, xpath, pseudo_element):
|
|
@@ -98,13 +95,13 @@ def xpath_text_simple_pseudo_element(self, xpath):
|
|
class GenericTranslator(TranslatorMixin, OriginalGenericTranslator):
|
|
@lru_cache(maxsize=256)
|
|
def css_to_xpath(self, css, prefix='descendant-or-self::'):
|
|
- return super(GenericTranslator, self).css_to_xpath(css, prefix)
|
|
+ return super().css_to_xpath(css, prefix)
|
|
|
|
|
|
class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
|
|
@lru_cache(maxsize=256)
|
|
def css_to_xpath(self, css, prefix='descendant-or-self::'):
|
|
- return super(HTMLTranslator, self).css_to_xpath(css, prefix)
|
|
+ return super().css_to_xpath(css, prefix)
|
|
|
|
|
|
_translator = HTMLTranslator()
|
|
diff --git a/parsel/selector.py b/parsel/selector.py
|
|
index 504a4fe..b644e82 100644
|
|
--- a/parsel/selector.py
|
|
+++ b/parsel/selector.py
|
|
@@ -2,9 +2,6 @@
|
|
XPath selectors based on lxml
|
|
"""
|
|
|
|
-import sys
|
|
-
|
|
-import six
|
|
from lxml import etree, html
|
|
|
|
from .utils import flatten, iflatten, extract_regex, shorten
|
|
@@ -22,7 +19,7 @@ class CannotRemoveElementWithoutParent(Exception):
|
|
class SafeXMLParser(etree.XMLParser):
|
|
def __init__(self, *args, **kwargs):
|
|
kwargs.setdefault('resolve_entities', False)
|
|
- super(SafeXMLParser, self).__init__(*args, **kwargs)
|
|
+ super().__init__(*args, **kwargs)
|
|
|
|
|
|
_ctgroup = {
|
|
@@ -61,13 +58,8 @@ class SelectorList(list):
|
|
class, which provides a few additional methods.
|
|
"""
|
|
|
|
- # __getslice__ is deprecated but `list` builtin implements it only in Py2
|
|
- def __getslice__(self, i, j):
|
|
- o = super(SelectorList, self).__getslice__(i, j)
|
|
- return self.__class__(o)
|
|
-
|
|
def __getitem__(self, pos):
|
|
- o = super(SelectorList, self).__getitem__(pos)
|
|
+ o = super().__getitem__(pos)
|
|
return self.__class__(o) if isinstance(pos, slice) else o
|
|
|
|
def __getstate__(self):
|
|
@@ -164,7 +156,7 @@ def remove(self):
|
|
x.remove()
|
|
|
|
|
|
-class Selector(object):
|
|
+class Selector:
|
|
"""
|
|
:class:`Selector` allows you to select parts of an XML or HTML text using CSS
|
|
or XPath expressions and extract data from it.
|
|
@@ -204,9 +196,10 @@ def __init__(self, text=None, type=None, namespaces=None, root=None,
|
|
self._tostring_method = _ctgroup[st]['_tostring_method']
|
|
|
|
if text is not None:
|
|
- if not isinstance(text, six.text_type):
|
|
- msg = "text argument should be of type %s, got %s" % (
|
|
- six.text_type, text.__class__)
|
|
+ if not isinstance(text, str):
|
|
+ msg = "text argument should be of type str, got %s" % (
|
|
+ text.__class__
|
|
+ )
|
|
raise TypeError(msg)
|
|
root = self._get_root(text, base_url)
|
|
elif root is None:
|
|
@@ -255,9 +248,7 @@ def xpath(self, query, namespaces=None, **kwargs):
|
|
smart_strings=self._lxml_smart_strings,
|
|
**kwargs)
|
|
except etree.XPathError as exc:
|
|
- msg = u"XPath error: %s in %s" % (exc, query)
|
|
- msg = msg if six.PY3 else msg.encode('unicode_escape')
|
|
- six.reraise(ValueError, ValueError(msg), sys.exc_info()[2])
|
|
+ raise ValueError("XPath error: %s in %s" % (exc, query))
|
|
|
|
if type(result) is not list:
|
|
result = [result]
|
|
@@ -324,11 +315,11 @@ def get(self):
|
|
with_tail=False)
|
|
except (AttributeError, TypeError):
|
|
if self.root is True:
|
|
- return u'1'
|
|
+ return '1'
|
|
elif self.root is False:
|
|
- return u'0'
|
|
+ return '0'
|
|
else:
|
|
- return six.text_type(self.root)
|
|
+ return str(self.root)
|
|
extract = get
|
|
|
|
def getall(self):
|
|
@@ -354,7 +345,7 @@ def remove_namespaces(self):
|
|
if el.tag.startswith('{'):
|
|
el.tag = el.tag.split('}', 1)[1]
|
|
# loop on element attributes also
|
|
- for an in el.attrib.keys():
|
|
+ for an in el.attrib:
|
|
if an.startswith('{'):
|
|
el.attrib[an.split('}', 1)[1]] = el.attrib.pop(an)
|
|
# remove namespace declarations
|
|
diff --git a/parsel/utils.py b/parsel/utils.py
|
|
index 6914362..6aeff6f 100644
|
|
--- a/parsel/utils.py
|
|
+++ b/parsel/utils.py
|
|
@@ -1,5 +1,4 @@
|
|
import re
|
|
-import six
|
|
from w3lib.html import replace_entities as w3lib_replace_entities
|
|
|
|
|
|
@@ -50,10 +49,10 @@ def _is_listlike(x):
|
|
True
|
|
>>> _is_listlike((x for x in range(3)))
|
|
True
|
|
- >>> _is_listlike(six.moves.xrange(5))
|
|
+ >>> _is_listlike(range(5))
|
|
True
|
|
"""
|
|
- return hasattr(x, "__iter__") and not isinstance(x, (six.text_type, bytes))
|
|
+ return hasattr(x, "__iter__") and not isinstance(x, (str, bytes))
|
|
|
|
|
|
def extract_regex(regex, text, replace_entities=True):
|
|
@@ -62,7 +61,7 @@ def extract_regex(regex, text, replace_entities=True):
|
|
* if the regex contains multiple numbered groups, all those will be returned (flattened)
|
|
* if the regex doesn't contain any group the entire regex matching is returned
|
|
"""
|
|
- if isinstance(regex, six.string_types):
|
|
+ if isinstance(regex, str):
|
|
regex = re.compile(regex, re.UNICODE)
|
|
|
|
if 'extract' in regex.groupindex:
|
|
diff --git a/parsel/xpathfuncs.py b/parsel/xpathfuncs.py
|
|
index 95b07ba..ceb8eaf 100644
|
|
--- a/parsel/xpathfuncs.py
|
|
+++ b/parsel/xpathfuncs.py
|
|
@@ -1,8 +1,6 @@
|
|
import re
|
|
from lxml import etree
|
|
|
|
-from six import string_types
|
|
-
|
|
from w3lib.html import HTML5_WHITESPACE
|
|
|
|
regex = '[{}]+'.format(HTML5_WHITESPACE)
|
|
@@ -45,7 +43,7 @@ def has_class(context, *classes):
|
|
raise ValueError(
|
|
'XPath error: has-class must have at least 1 argument')
|
|
for c in classes:
|
|
- if not isinstance(c, string_types):
|
|
+ if not isinstance(c, str):
|
|
raise ValueError(
|
|
'XPath error: has-class arguments must be strings')
|
|
context.eval_context['args_checked'] = True
|
|
diff --git a/setup.py b/setup.py
|
|
index d14ad0e..ade049f 100644
|
|
--- a/setup.py
|
|
+++ b/setup.py
|
|
@@ -1,9 +1,5 @@
|
|
#!/usr/bin/env python
|
|
-# -*- coding: utf-8 -*-
|
|
|
|
-import sys
|
|
-
|
|
-from pkg_resources import parse_version
|
|
from setuptools import setup, __version__ as setuptools_version
|
|
|
|
|
|
@@ -13,32 +9,6 @@
|
|
with open('NEWS') as history_file:
|
|
history = history_file.read().replace('.. :changelog:', '')
|
|
|
|
-test_requirements = [
|
|
-]
|
|
-
|
|
-def has_environment_marker_platform_impl_support():
|
|
- """Code extracted from 'pytest/setup.py'
|
|
- https://github.com/pytest-dev/pytest/blob/7538680c/setup.py#L31
|
|
- The first known release to support environment marker with range operators
|
|
- it is 18.5, see:
|
|
- https://setuptools.readthedocs.io/en/latest/history.html#id235
|
|
- """
|
|
- return parse_version(setuptools_version) >= parse_version('18.5')
|
|
-
|
|
-install_requires = [
|
|
- 'w3lib>=1.19.0',
|
|
- 'lxml',
|
|
- 'six>=1.6.0',
|
|
- 'cssselect>=0.9'
|
|
-]
|
|
-extras_require = {}
|
|
-
|
|
-if not has_environment_marker_platform_impl_support():
|
|
- if sys.version_info[0:2] < (3, 0):
|
|
- install_requires.append("functools32")
|
|
-else:
|
|
- extras_require[":python_version<'3.0'"] = ["functools32"]
|
|
-
|
|
setup(
|
|
name='parsel',
|
|
version='1.6.0',
|
|
@@ -50,11 +20,16 @@ def has_environment_marker_platform_impl_support():
|
|
packages=[
|
|
'parsel',
|
|
],
|
|
- package_dir={'parsel':
|
|
- 'parsel'},
|
|
+ package_dir={
|
|
+ 'parsel': 'parsel',
|
|
+ },
|
|
include_package_data=True,
|
|
- install_requires=install_requires,
|
|
- extras_require=extras_require,
|
|
+ install_requires=[
|
|
+ 'cssselect>=0.9',
|
|
+ 'lxml',
|
|
+ 'w3lib>=1.19.0',
|
|
+ ],
|
|
+ python_requires='>=3.6',
|
|
license="BSD",
|
|
zip_safe=False,
|
|
keywords='parsel',
|
|
@@ -66,13 +41,11 @@ def has_environment_marker_platform_impl_support():
|
|
'Topic :: Text Processing :: Markup',
|
|
'Topic :: Text Processing :: Markup :: HTML',
|
|
'Topic :: Text Processing :: Markup :: XML',
|
|
- 'Programming Language :: Python :: 2',
|
|
- 'Programming Language :: Python :: 2.7',
|
|
'Programming Language :: Python :: 3',
|
|
- 'Programming Language :: Python :: 3.5',
|
|
'Programming Language :: Python :: 3.6',
|
|
'Programming Language :: Python :: 3.7',
|
|
'Programming Language :: Python :: 3.8',
|
|
+ 'Programming Language :: Python :: 3.9',
|
|
'Programming Language :: Python :: Implementation :: CPython',
|
|
'Programming Language :: Python :: Implementation :: PyPy',
|
|
],
|
|
diff --git a/tests/test_selector.py b/tests/test_selector.py
|
|
index a5c61f6..f5c60ae 100644
|
|
--- a/tests/test_selector.py
|
|
+++ b/tests/test_selector.py
|
|
@@ -1,7 +1,5 @@
|
|
-# -*- coding: utf-8 -*-
|
|
import re
|
|
import weakref
|
|
-import six
|
|
import unittest
|
|
import pickle
|
|
|
|
@@ -17,11 +15,11 @@ class SelectorTestCase(unittest.TestCase):
|
|
sscls = Selector
|
|
|
|
def test_pickle_selector(self):
|
|
- sel = self.sscls(text=u'<html><body><p>some text</p></body></html>')
|
|
+ sel = self.sscls(text='<html><body><p>some text</p></body></html>')
|
|
self.assertRaises(TypeError, lambda s: pickle.dumps(s, protocol=2), sel)
|
|
|
|
def test_pickle_selector_list(self):
|
|
- sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
+ sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
sel_list = sel.css('li')
|
|
empty_sel_list = sel.css('p')
|
|
self.assertIsInstance(sel_list, self.sscls.selectorlist_cls)
|
|
@@ -31,7 +29,7 @@ def test_pickle_selector_list(self):
|
|
|
|
def test_simple_selection(self):
|
|
"""Simple selector tests"""
|
|
- body = u"<p><input name='a'value='1'/><input name='b'value='2'/></p>"
|
|
+ body = "<p><input name='a'value='1'/><input name='b'value='2'/></p>"
|
|
sel = self.sscls(text=body)
|
|
|
|
xl = sel.xpath('//input')
|
|
@@ -43,48 +41,48 @@ def test_simple_selection(self):
|
|
[x.extract() for x in sel.xpath('//input')])
|
|
|
|
self.assertEqual([x.extract() for x in sel.xpath("//input[@name='a']/@name")],
|
|
- [u'a'])
|
|
+ ['a'])
|
|
self.assertEqual([x.extract() for x in sel.xpath("number(concat(//input[@name='a']/@value, //input[@name='b']/@value))")],
|
|
- [u'12.0'])
|
|
+ ['12.0'])
|
|
|
|
self.assertEqual(sel.xpath("concat('xpath', 'rules')").extract(),
|
|
- [u'xpathrules'])
|
|
+ ['xpathrules'])
|
|
self.assertEqual([x.extract() for x in sel.xpath("concat(//input[@name='a']/@value, //input[@name='b']/@value)")],
|
|
- [u'12'])
|
|
+ ['12'])
|
|
|
|
def test_simple_selection_with_variables(self):
|
|
"""Using XPath variables"""
|
|
- body = u"<p><input name='a' value='1'/><input name='b' value='2'/></p>"
|
|
+ body = "<p><input name='a' value='1'/><input name='b' value='2'/></p>"
|
|
sel = self.sscls(text=body)
|
|
|
|
self.assertEqual([x.extract() for x in sel.xpath("//input[@value=$number]/@name", number=1)],
|
|
- [u'a'])
|
|
+ ['a'])
|
|
self.assertEqual([x.extract() for x in sel.xpath("//input[@name=$letter]/@value", letter='b')],
|
|
- [u'2'])
|
|
+ ['2'])
|
|
|
|
self.assertEqual(sel.xpath("count(//input[@value=$number or @name=$letter])",
|
|
number=2, letter='a').extract(),
|
|
- [u'2.0'])
|
|
+ ['2.0'])
|
|
|
|
# you can also pass booleans
|
|
self.assertEqual(sel.xpath("boolean(count(//input)=$cnt)=$test",
|
|
cnt=2, test=True).extract(),
|
|
- [u'1'])
|
|
+ ['1'])
|
|
self.assertEqual(sel.xpath("boolean(count(//input)=$cnt)=$test",
|
|
cnt=4, test=True).extract(),
|
|
- [u'0'])
|
|
+ ['0'])
|
|
self.assertEqual(sel.xpath("boolean(count(//input)=$cnt)=$test",
|
|
cnt=4, test=False).extract(),
|
|
- [u'1'])
|
|
+ ['1'])
|
|
|
|
# for named nodes, you need to use "name()=node_name"
|
|
self.assertEqual(sel.xpath("boolean(count(//*[name()=$tag])=$cnt)=$test",
|
|
tag="input", cnt=2, test=True).extract(),
|
|
- [u'1'])
|
|
+ ['1'])
|
|
|
|
def test_simple_selection_with_variables_escape_friendly(self):
|
|
"""Using XPath variables with quotes that would need escaping with string formatting"""
|
|
- body = u"""<p>I'm mixing single and <input name='a' value='I say "Yeah!"'/>
|
|
+ body = """<p>I'm mixing single and <input name='a' value='I say "Yeah!"'/>
|
|
"double quotes" and I don't care :)</p>"""
|
|
sel = self.sscls(text=body)
|
|
|
|
@@ -95,7 +93,7 @@ def test_simple_selection_with_variables_escape_friendly(self):
|
|
|
|
# with XPath variables, escaping is done for you
|
|
self.assertEqual([x.extract() for x in sel.xpath("//input[@value=$text]/@name", text=t)],
|
|
- [u'a'])
|
|
+ ['a'])
|
|
lt = """I'm mixing single and "double quotes" and I don't care :)"""
|
|
# the following gives you something like
|
|
# ValueError: XPath error: Invalid predicate in //p[normalize-space()='I'm mixing single and "double quotes" and I don't care :)']//@name
|
|
@@ -103,10 +101,10 @@ def test_simple_selection_with_variables_escape_friendly(self):
|
|
|
|
self.assertEqual([x.extract() for x in sel.xpath("//p[normalize-space()=$lng]//@name",
|
|
lng=lt)],
|
|
- [u'a'])
|
|
+ ['a'])
|
|
|
|
def test_accessing_attributes(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<html lang="en" version="1.0">
|
|
<body>
|
|
<ul id="some-list" class="list-cls" class="list-cls">
|
|
@@ -134,12 +132,10 @@ def test_accessing_attributes(self):
|
|
[e.attrib for e in sel.css('li')])
|
|
|
|
def test_representation_slice(self):
|
|
- body = u"<p><input name='{}' value='\xa9'/></p>".format(50 * 'b')
|
|
+ body = "<p><input name='{}' value='\xa9'/></p>".format(50 * 'b')
|
|
sel = self.sscls(text=body)
|
|
|
|
representation = "<Selector xpath='//input/@name' data='{}...'>".format(37 * 'b')
|
|
- if six.PY2:
|
|
- representation = "<Selector xpath='//input/@name' data=u'{}...'>".format(37 * 'b')
|
|
|
|
self.assertEqual(
|
|
[repr(it) for it in sel.xpath('//input/@name')],
|
|
@@ -147,25 +143,27 @@ def test_representation_slice(self):
|
|
)
|
|
|
|
def test_representation_unicode_query(self):
|
|
- body = u"<p><input name='{}' value='\xa9'/></p>".format(50 * 'b')
|
|
+ body = "<p><input name='{}' value='\xa9'/></p>".format(50 * 'b')
|
|
|
|
representation = '<Selector xpath=\'//input[@value="©"]/@value\' data=\'©\'>'
|
|
- if six.PY2:
|
|
- representation = "<Selector xpath=u'//input[@value=\"\\xa9\"]/@value' data=u'\\xa9'>"
|
|
|
|
sel = self.sscls(text=body)
|
|
self.assertEqual(
|
|
- [repr(it) for it in sel.xpath(u'//input[@value="\xa9"]/@value')],
|
|
+ [repr(it) for it in sel.xpath('//input[@value="\xa9"]/@value')],
|
|
[representation]
|
|
)
|
|
|
|
def test_check_text_argument_type(self):
|
|
- self.assertRaisesRegexp(TypeError, 'text argument should be of type',
|
|
- self.sscls, b'<html/>')
|
|
+ self.assertRaisesRegex(
|
|
+ TypeError,
|
|
+ 'text argument should be of type',
|
|
+ self.sscls,
|
|
+ b'<html/>',
|
|
+ )
|
|
|
|
def test_extract_first(self):
|
|
"""Test if extract_first() returns first element"""
|
|
- body = u'<ul><li id="1">1</li><li id="2">2</li></ul>'
|
|
+ body = '<ul><li id="1">1</li><li id="2">2</li></ul>'
|
|
sel = self.sscls(text=body)
|
|
|
|
self.assertEqual(sel.xpath('//ul/li/text()').extract_first(),
|
|
@@ -181,38 +179,38 @@ def test_extract_first(self):
|
|
|
|
def test_extract_first_default(self):
|
|
"""Test if extract_first() returns default value when no results found"""
|
|
- body = u'<ul><li id="1">1</li><li id="2">2</li></ul>'
|
|
+ body = '<ul><li id="1">1</li><li id="2">2</li></ul>'
|
|
sel = self.sscls(text=body)
|
|
|
|
self.assertEqual(sel.xpath('//div/text()').extract_first(default='missing'), 'missing')
|
|
|
|
def test_selector_get_alias(self):
|
|
"""Test if get() returns extracted value on a Selector"""
|
|
- body = u'<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>'
|
|
+ body = '<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>'
|
|
sel = self.sscls(text=body)
|
|
|
|
- self.assertEqual(sel.xpath('//ul/li[position()>1]')[0].get(), u'<li id="2">2</li>')
|
|
- self.assertEqual(sel.xpath('//ul/li[position()>1]/text()')[0].get(), u'2')
|
|
+ self.assertEqual(sel.xpath('//ul/li[position()>1]')[0].get(), '<li id="2">2</li>')
|
|
+ self.assertEqual(sel.xpath('//ul/li[position()>1]/text()')[0].get(), '2')
|
|
|
|
def test_selector_getall_alias(self):
|
|
"""Test if get() returns extracted value on a Selector"""
|
|
- body = u'<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>'
|
|
+ body = '<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>'
|
|
sel = self.sscls(text=body)
|
|
|
|
- self.assertListEqual(sel.xpath('//ul/li[position()>1]')[0].getall(), [u'<li id="2">2</li>'])
|
|
- self.assertListEqual(sel.xpath('//ul/li[position()>1]/text()')[0].getall(), [u'2'])
|
|
+ self.assertListEqual(sel.xpath('//ul/li[position()>1]')[0].getall(), ['<li id="2">2</li>'])
|
|
+ self.assertListEqual(sel.xpath('//ul/li[position()>1]/text()')[0].getall(), ['2'])
|
|
|
|
def test_selectorlist_get_alias(self):
|
|
"""Test if get() returns first element for a selection call"""
|
|
- body = u'<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>'
|
|
+ body = '<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>'
|
|
sel = self.sscls(text=body)
|
|
|
|
- self.assertEqual(sel.xpath('//ul/li').get(), u'<li id="1">1</li>')
|
|
- self.assertEqual(sel.xpath('//ul/li/text()').get(), u'1')
|
|
+ self.assertEqual(sel.xpath('//ul/li').get(), '<li id="1">1</li>')
|
|
+ self.assertEqual(sel.xpath('//ul/li/text()').get(), '1')
|
|
|
|
def test_re_first(self):
|
|
"""Test if re_first() returns first matched element"""
|
|
- body = u'<ul><li id="1">1</li><li id="2">2</li></ul>'
|
|
+ body = '<ul><li id="1">1</li><li id="2">2</li></ul>'
|
|
sel = self.sscls(text=body)
|
|
|
|
self.assertEqual(sel.xpath('//ul/li/text()').re_first(r'\d'),
|
|
@@ -233,71 +231,73 @@ def test_re_first(self):
|
|
|
|
def test_extract_first_re_default(self):
|
|
"""Test if re_first() returns default value when no results found"""
|
|
- body = u'<ul><li id="1">1</li><li id="2">2</li></ul>'
|
|
+ body = '<ul><li id="1">1</li><li id="2">2</li></ul>'
|
|
sel = self.sscls(text=body)
|
|
|
|
self.assertEqual(sel.xpath('//div/text()').re_first(r'\w+', default='missing'), 'missing')
|
|
self.assertEqual(sel.xpath('/ul/li/text()').re_first(r'\w+', default='missing'), 'missing')
|
|
|
|
def test_select_unicode_query(self):
|
|
- body = u"<p><input name='\xa9' value='1'/></p>"
|
|
+ body = "<p><input name='\xa9' value='1'/></p>"
|
|
sel = self.sscls(text=body)
|
|
- self.assertEqual(sel.xpath(u'//input[@name="\xa9"]/@value').extract(), [u'1'])
|
|
+ self.assertEqual(sel.xpath('//input[@name="\xa9"]/@value').extract(), ['1'])
|
|
|
|
def test_list_elements_type(self):
|
|
"""Test Selector returning the same type in selection methods"""
|
|
- text = u'<p>test<p>'
|
|
+ text = '<p>test<p>'
|
|
assert isinstance(self.sscls(text=text).xpath("//p")[0], self.sscls)
|
|
assert isinstance(self.sscls(text=text).css("p")[0], self.sscls)
|
|
|
|
def test_boolean_result(self):
|
|
- body = u"<p><input name='a'value='1'/><input name='b'value='2'/></p>"
|
|
+ body = "<p><input name='a'value='1'/><input name='b'value='2'/></p>"
|
|
xs = self.sscls(text=body)
|
|
- self.assertEqual(xs.xpath("//input[@name='a']/@name='a'").extract(), [u'1'])
|
|
- self.assertEqual(xs.xpath("//input[@name='a']/@name='n'").extract(), [u'0'])
|
|
+ self.assertEqual(xs.xpath("//input[@name='a']/@name='a'").extract(), ['1'])
|
|
+ self.assertEqual(xs.xpath("//input[@name='a']/@name='n'").extract(), ['0'])
|
|
|
|
def test_differences_parsing_xml_vs_html(self):
|
|
"""Test that XML and HTML Selector's behave differently"""
|
|
# some text which is parsed differently by XML and HTML flavors
|
|
- text = u'<div><img src="a.jpg"><p>Hello</div>'
|
|
+ text = '<div><img src="a.jpg"><p>Hello</div>'
|
|
hs = self.sscls(text=text, type='html')
|
|
self.assertEqual(hs.xpath("//div").extract(),
|
|
- [u'<div><img src="a.jpg"><p>Hello</p></div>'])
|
|
+ ['<div><img src="a.jpg"><p>Hello</p></div>'])
|
|
|
|
xs = self.sscls(text=text, type='xml')
|
|
self.assertEqual(xs.xpath("//div").extract(),
|
|
- [u'<div><img src="a.jpg"><p>Hello</p></img></div>'])
|
|
+ ['<div><img src="a.jpg"><p>Hello</p></img></div>'])
|
|
|
|
def test_error_for_unknown_selector_type(self):
|
|
- self.assertRaises(ValueError, self.sscls, text=u'', type='_na_')
|
|
+ self.assertRaises(ValueError, self.sscls, text='', type='_na_')
|
|
|
|
def test_text_or_root_is_required(self):
|
|
- self.assertRaisesRegexp(ValueError,
|
|
- 'Selector needs either text or root argument',
|
|
- self.sscls)
|
|
+ self.assertRaisesRegex(
|
|
+ ValueError,
|
|
+ 'Selector needs either text or root argument',
|
|
+ self.sscls,
|
|
+ )
|
|
|
|
def test_bool(self):
|
|
- text = u'<a href="" >false</a><a href="nonempty">true</a>'
|
|
+ text = '<a href="" >false</a><a href="nonempty">true</a>'
|
|
hs = self.sscls(text=text, type='html')
|
|
falsish = hs.xpath('//a/@href')[0]
|
|
- self.assertEqual(falsish.extract(), u'')
|
|
+ self.assertEqual(falsish.extract(), '')
|
|
self.assertFalse(falsish)
|
|
trueish = hs.xpath('//a/@href')[1]
|
|
- self.assertEqual(trueish.extract(), u'nonempty')
|
|
+ self.assertEqual(trueish.extract(), 'nonempty')
|
|
self.assertTrue(trueish)
|
|
|
|
def test_slicing(self):
|
|
- text = u'<div><p>1</p><p>2</p><p>3</p></div>'
|
|
+ text = '<div><p>1</p><p>2</p><p>3</p></div>'
|
|
hs = self.sscls(text=text, type='html')
|
|
self.assertIsInstance(hs.css('p')[2], self.sscls)
|
|
self.assertIsInstance(hs.css('p')[2:3], self.sscls.selectorlist_cls)
|
|
self.assertIsInstance(hs.css('p')[:2], self.sscls.selectorlist_cls)
|
|
- self.assertEqual(hs.css('p')[2:3].extract(), [u'<p>3</p>'])
|
|
- self.assertEqual(hs.css('p')[1:3].extract(), [u'<p>2</p>', u'<p>3</p>'])
|
|
+ self.assertEqual(hs.css('p')[2:3].extract(), ['<p>3</p>'])
|
|
+ self.assertEqual(hs.css('p')[1:3].extract(), ['<p>2</p>', '<p>3</p>'])
|
|
|
|
def test_nested_selectors(self):
|
|
"""Nested selector tests"""
|
|
- body = u"""<body>
|
|
+ body = """<body>
|
|
<div class='one'>
|
|
<ul>
|
|
<li>one</li><li>two</li>
|
|
@@ -322,7 +322,7 @@ def test_nested_selectors(self):
|
|
|
|
def test_selectorlist_getall_alias(self):
|
|
"""Nested selector tests using getall()"""
|
|
- body = u"""<body>
|
|
+ body = """<body>
|
|
<div class='one'>
|
|
<ul>
|
|
<li>one</li><li>two</li>
|
|
@@ -346,20 +346,20 @@ def test_selectorlist_getall_alias(self):
|
|
self.assertEqual(divtwo.xpath("./li").getall(), [])
|
|
|
|
def test_mixed_nested_selectors(self):
|
|
- body = u'''<body>
|
|
+ body = '''<body>
|
|
<div id=1>not<span>me</span></div>
|
|
<div class="dos"><p>text</p><a href='#'>foo</a></div>
|
|
</body>'''
|
|
sel = self.sscls(text=body)
|
|
- self.assertEqual(sel.xpath('//div[@id="1"]').css('span::text').extract(), [u'me'])
|
|
- self.assertEqual(sel.css('#1').xpath('./span/text()').extract(), [u'me'])
|
|
+ self.assertEqual(sel.xpath('//div[@id="1"]').css('span::text').extract(), ['me'])
|
|
+ self.assertEqual(sel.css('#1').xpath('./span/text()').extract(), ['me'])
|
|
|
|
def test_dont_strip(self):
|
|
- sel = self.sscls(text=u'<div>fff: <a href="#">zzz</a></div>')
|
|
- self.assertEqual(sel.xpath("//text()").extract(), [u'fff: ', u'zzz'])
|
|
+ sel = self.sscls(text='<div>fff: <a href="#">zzz</a></div>')
|
|
+ self.assertEqual(sel.xpath("//text()").extract(), ['fff: ', 'zzz'])
|
|
|
|
def test_namespaces_simple(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<test xmlns:somens="http://scrapy.org">
|
|
<somens:a id="foo">take this</a>
|
|
<a id="bar">found</a>
|
|
@@ -370,10 +370,10 @@ def test_namespaces_simple(self):
|
|
|
|
x.register_namespace("somens", "http://scrapy.org")
|
|
self.assertEqual(x.xpath("//somens:a/text()").extract(),
|
|
- [u'take this'])
|
|
+ ['take this'])
|
|
|
|
def test_namespaces_adhoc(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<test xmlns:somens="http://scrapy.org">
|
|
<somens:a id="foo">take this</a>
|
|
<a id="bar">found</a>
|
|
@@ -384,10 +384,10 @@ def test_namespaces_adhoc(self):
|
|
|
|
self.assertEqual(x.xpath("//somens:a/text()",
|
|
namespaces={"somens": "http://scrapy.org"}).extract(),
|
|
- [u'take this'])
|
|
+ ['take this'])
|
|
|
|
def test_namespaces_adhoc_variables(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<test xmlns:somens="http://scrapy.org">
|
|
<somens:a id="foo">take this</a>
|
|
<a id="bar">found</a>
|
|
@@ -399,10 +399,10 @@ def test_namespaces_adhoc_variables(self):
|
|
self.assertEqual(x.xpath("//somens:a/following-sibling::a[@id=$identifier]/text()",
|
|
namespaces={"somens": "http://scrapy.org"},
|
|
identifier="bar").extract(),
|
|
- [u'found'])
|
|
+ ['found'])
|
|
|
|
def test_namespaces_multiple(self):
|
|
- body = u"""<?xml version="1.0" encoding="UTF-8"?>
|
|
+ body = """<?xml version="1.0" encoding="UTF-8"?>
|
|
<BrowseNode xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05"
|
|
xmlns:b="http://somens.com"
|
|
xmlns:p="http://www.scrapy.org/product" >
|
|
@@ -423,7 +423,7 @@ def test_namespaces_multiple(self):
|
|
self.assertEqual(x.xpath("//p:SecondTestTag/xmlns:material/text()").extract()[0], 'iron')
|
|
|
|
def test_namespaces_multiple_adhoc(self):
|
|
- body = u"""<?xml version="1.0" encoding="UTF-8"?>
|
|
+ body = """<?xml version="1.0" encoding="UTF-8"?>
|
|
<BrowseNode xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05"
|
|
xmlns:b="http://somens.com"
|
|
xmlns:p="http://www.scrapy.org/product" >
|
|
@@ -495,13 +495,13 @@ def test_namespaces_multiple_adhoc(self):
|
|
self.assertEqual(x.xpath("//p:SecondTestTag/xmlns:material/text()").extract()[0], 'iron')
|
|
|
|
def test_make_links_absolute(self):
|
|
- text = u'<a href="file.html">link to file</a>'
|
|
+ text = '<a href="file.html">link to file</a>'
|
|
sel = Selector(text=text, base_url='http://example.com')
|
|
sel.root.make_links_absolute()
|
|
- self.assertEqual(u'http://example.com/file.html', sel.xpath('//a/@href').extract_first())
|
|
+ self.assertEqual('http://example.com/file.html', sel.xpath('//a/@href').extract_first())
|
|
|
|
def test_re(self):
|
|
- body = u"""<div>Name: Mary
|
|
+ body = """<div>Name: Mary
|
|
<ul>
|
|
<li>Name: John</li>
|
|
<li>Age: 10</li>
|
|
@@ -519,23 +519,23 @@ def test_re(self):
|
|
["10", "20"])
|
|
|
|
# Test named group, hit and miss
|
|
- x = self.sscls(text=u'foobar')
|
|
+ x = self.sscls(text='foobar')
|
|
self.assertEqual(x.re('(?P<extract>foo)'), ['foo'])
|
|
self.assertEqual(x.re('(?P<extract>baz)'), [])
|
|
|
|
# A purposely constructed test for an edge case
|
|
- x = self.sscls(text=u'baz')
|
|
+ x = self.sscls(text='baz')
|
|
self.assertEqual(x.re('(?P<extract>foo)|(?P<bar>baz)'), [])
|
|
|
|
def test_re_replace_entities(self):
|
|
- body = u"""<script>{"foo":"bar & "baz""}</script>"""
|
|
+ body = """<script>{"foo":"bar & "baz""}</script>"""
|
|
x = self.sscls(text=body)
|
|
|
|
name_re = re.compile('{"foo":(.*)}')
|
|
|
|
# by default, only & and < are preserved ;
|
|
# other entities are converted
|
|
- expected = u'"bar & "baz""'
|
|
+ expected = '"bar & "baz""'
|
|
self.assertEqual(x.xpath("//script/text()").re(name_re), [expected])
|
|
self.assertEqual(x.xpath("//script").re(name_re), [expected])
|
|
self.assertEqual(x.xpath("//script/text()")[0].re(name_re), [expected])
|
|
@@ -546,7 +546,7 @@ def test_re_replace_entities(self):
|
|
self.assertEqual(x.xpath("//script")[0].re_first(name_re), expected)
|
|
|
|
# switching off replace_entities will preserve " also
|
|
- expected = u'"bar & "baz""'
|
|
+ expected = '"bar & "baz""'
|
|
self.assertEqual(x.xpath("//script/text()").re(name_re, replace_entities=False), [expected])
|
|
self.assertEqual(x.xpath("//script")[0].re(name_re, replace_entities=False), [expected])
|
|
|
|
@@ -554,95 +554,94 @@ def test_re_replace_entities(self):
|
|
self.assertEqual(x.xpath("//script")[0].re_first(name_re, replace_entities=False), expected)
|
|
|
|
def test_re_intl(self):
|
|
- body = u'<div>Evento: cumplea\xf1os</div>'
|
|
+ body = '<div>Evento: cumplea\xf1os</div>'
|
|
x = self.sscls(text=body)
|
|
- self.assertEqual(x.xpath("//div").re(r"Evento: (\w+)"), [u'cumplea\xf1os'])
|
|
+ self.assertEqual(x.xpath("//div").re(r"Evento: (\w+)"), ['cumplea\xf1os'])
|
|
|
|
def test_selector_over_text(self):
|
|
- hs = self.sscls(text=u'<root>lala</root>')
|
|
- self.assertEqual(hs.extract(), u'<html><body><root>lala</root></body></html>')
|
|
- xs = self.sscls(text=u'<root>lala</root>', type='xml')
|
|
- self.assertEqual(xs.extract(), u'<root>lala</root>')
|
|
- self.assertEqual(xs.xpath('.').extract(), [u'<root>lala</root>'])
|
|
+ hs = self.sscls(text='<root>lala</root>')
|
|
+ self.assertEqual(hs.extract(), '<html><body><root>lala</root></body></html>')
|
|
+ xs = self.sscls(text='<root>lala</root>', type='xml')
|
|
+ self.assertEqual(xs.extract(), '<root>lala</root>')
|
|
+ self.assertEqual(xs.xpath('.').extract(), ['<root>lala</root>'])
|
|
|
|
def test_invalid_xpath(self):
|
|
"Test invalid xpath raises ValueError with the invalid xpath"
|
|
- x = self.sscls(text=u"<html></html>")
|
|
+ x = self.sscls(text="<html></html>")
|
|
xpath = "//test[@foo='bar]"
|
|
- self.assertRaisesRegexp(ValueError, re.escape(xpath), x.xpath, xpath)
|
|
+ self.assertRaisesRegex(ValueError, re.escape(xpath), x.xpath, xpath)
|
|
|
|
def test_invalid_xpath_unicode(self):
|
|
"Test *Unicode* invalid xpath raises ValueError with the invalid xpath"
|
|
- x = self.sscls(text=u"<html></html>")
|
|
- xpath = u"//test[@foo='\u0431ar]"
|
|
- encoded = xpath if six.PY3 else xpath.encode('unicode_escape')
|
|
- self.assertRaisesRegexp(ValueError, re.escape(encoded), x.xpath, xpath)
|
|
+ x = self.sscls(text="<html></html>")
|
|
+ xpath = "//test[@foo='\\u0431ar]"
|
|
+ self.assertRaisesRegex(ValueError, re.escape(xpath), x.xpath, xpath)
|
|
|
|
def test_http_header_encoding_precedence(self):
|
|
- # u'\xa3' = pound symbol in unicode
|
|
- # u'\xc2\xa3' = pound symbol in utf-8
|
|
- # u'\xa3' = pound symbol in latin-1 (iso-8859-1)
|
|
+ # '\xa3' = pound symbol in unicode
|
|
+ # '\xc2\xa3' = pound symbol in utf-8
|
|
+ # '\xa3' = pound symbol in latin-1 (iso-8859-1)
|
|
|
|
- text = u'''<html>
|
|
+ text = '''<html>
|
|
<head><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"></head>
|
|
<body><span id="blank">\xa3</span></body></html>'''
|
|
x = self.sscls(text=text)
|
|
self.assertEqual(x.xpath("//span[@id='blank']/text()").extract(),
|
|
- [u'\xa3'])
|
|
+ ['\xa3'])
|
|
|
|
def test_empty_bodies_shouldnt_raise_errors(self):
|
|
- self.sscls(text=u'').xpath('//text()').extract()
|
|
+ self.sscls(text='').xpath('//text()').extract()
|
|
|
|
def test_bodies_with_comments_only(self):
|
|
- sel = self.sscls(text=u'<!-- hello world -->', base_url='http://example.com')
|
|
- self.assertEqual(u'http://example.com', sel.root.base)
|
|
+ sel = self.sscls(text='<!-- hello world -->', base_url='http://example.com')
|
|
+ self.assertEqual('http://example.com', sel.root.base)
|
|
|
|
def test_null_bytes_shouldnt_raise_errors(self):
|
|
- text = u'<root>pre\x00post</root>'
|
|
+ text = '<root>pre\x00post</root>'
|
|
self.sscls(text).xpath('//text()').extract()
|
|
|
|
def test_replacement_char_from_badly_encoded_body(self):
|
|
# \xe9 alone isn't valid utf8 sequence
|
|
- text = u'<html><p>an Jos\ufffd de</p><html>'
|
|
- self.assertEqual([u'an Jos\ufffd de'],
|
|
+ text = '<html><p>an Jos\\ufffd de</p><html>'
|
|
+ self.assertEqual(['an Jos\\ufffd de'],
|
|
self.sscls(text).xpath('//text()').extract())
|
|
|
|
def test_select_on_unevaluable_nodes(self):
|
|
- r = self.sscls(text=u'<span class="big">some text</span>')
|
|
+ r = self.sscls(text='<span class="big">some text</span>')
|
|
# Text node
|
|
x1 = r.xpath('//text()')
|
|
- self.assertEqual(x1.extract(), [u'some text'])
|
|
+ self.assertEqual(x1.extract(), ['some text'])
|
|
self.assertEqual(x1.xpath('.//b').extract(), [])
|
|
# Tag attribute
|
|
x1 = r.xpath('//span/@class')
|
|
- self.assertEqual(x1.extract(), [u'big'])
|
|
+ self.assertEqual(x1.extract(), ['big'])
|
|
self.assertEqual(x1.xpath('.//text()').extract(), [])
|
|
|
|
def test_select_on_text_nodes(self):
|
|
- r = self.sscls(text=u'<div><b>Options:</b>opt1</div><div><b>Other</b>opt2</div>')
|
|
+ r = self.sscls(text='<div><b>Options:</b>opt1</div><div><b>Other</b>opt2</div>')
|
|
x1 = r.xpath("//div/descendant::text()[preceding-sibling::b[contains(text(), 'Options')]]")
|
|
- self.assertEqual(x1.extract(), [u'opt1'])
|
|
+ self.assertEqual(x1.extract(), ['opt1'])
|
|
|
|
x1 = r.xpath("//div/descendant::text()/preceding-sibling::b[contains(text(), 'Options')]")
|
|
- self.assertEqual(x1.extract(), [u'<b>Options:</b>'])
|
|
+ self.assertEqual(x1.extract(), ['<b>Options:</b>'])
|
|
|
|
@unittest.skip("Text nodes lost parent node reference in lxml")
|
|
def test_nested_select_on_text_nodes(self):
|
|
# FIXME: does not work with lxml backend [upstream]
|
|
- r = self.sscls(text=u'<div><b>Options:</b>opt1</div><div><b>Other</b>opt2</div>')
|
|
+ r = self.sscls(text='<div><b>Options:</b>opt1</div><div><b>Other</b>opt2</div>')
|
|
x1 = r.xpath("//div/descendant::text()")
|
|
x2 = x1.xpath("./preceding-sibling::b[contains(text(), 'Options')]")
|
|
- self.assertEqual(x2.extract(), [u'<b>Options:</b>'])
|
|
+ self.assertEqual(x2.extract(), ['<b>Options:</b>'])
|
|
|
|
def test_weakref_slots(self):
|
|
"""Check that classes are using slots and are weak-referenceable"""
|
|
- x = self.sscls(text=u'')
|
|
+ x = self.sscls(text='')
|
|
weakref.ref(x)
|
|
assert not hasattr(x, '__dict__'), "%s does not use __slots__" % \
|
|
x.__class__.__name__
|
|
|
|
def test_remove_namespaces(self):
|
|
- xml = u"""<?xml version="1.0" encoding="UTF-8"?>
|
|
+ xml = """<?xml version="1.0" encoding="UTF-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-US" xmlns:media="http://search.yahoo.com/mrss/">
|
|
<link type="text/html"/>
|
|
<entry>
|
|
@@ -659,7 +658,7 @@ def test_remove_namespaces(self):
|
|
self.assertEqual(len(sel.xpath("./namespace::*")), 1)
|
|
|
|
def test_remove_namespaces_embedded(self):
|
|
- xml = u"""
|
|
+ xml = """
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<link type="text/html"/>
|
|
<entry>
|
|
@@ -686,7 +685,7 @@ def test_remove_namespaces_embedded(self):
|
|
self.assertEqual(len(sel.xpath("./namespace::*")), 1)
|
|
|
|
def test_remove_attributes_namespaces(self):
|
|
- xml = u"""<?xml version="1.0" encoding="UTF-8"?>
|
|
+ xml = """<?xml version="1.0" encoding="UTF-8"?>
|
|
<feed xmlns:atom="http://www.w3.org/2005/Atom" xml:lang="en-US" xmlns:media="http://search.yahoo.com/mrss/">
|
|
<link atom:type="text/html"/>
|
|
<entry>
|
|
@@ -706,7 +705,7 @@ def test_smart_strings(self):
|
|
class SmartStringsSelector(Selector):
|
|
_lxml_smart_strings = True
|
|
|
|
- body = u"""<body>
|
|
+ body = """<body>
|
|
<div class='one'>
|
|
<ul>
|
|
<li>one</li><li>two</li>
|
|
@@ -723,18 +722,18 @@ class SmartStringsSelector(Selector):
|
|
# only when smart_strings are on
|
|
x = self.sscls(text=body)
|
|
li_text = x.xpath('//li/text()')
|
|
- self.assertFalse(any(map(lambda e: hasattr(e.root, 'getparent'), li_text)))
|
|
+ self.assertFalse(any([hasattr(e.root, 'getparent') for e in li_text]))
|
|
div_class = x.xpath('//div/@class')
|
|
- self.assertFalse(any(map(lambda e: hasattr(e.root, 'getparent'), div_class)))
|
|
+ self.assertFalse(any([hasattr(e.root, 'getparent') for e in div_class]))
|
|
|
|
x = SmartStringsSelector(text=body)
|
|
li_text = x.xpath('//li/text()')
|
|
- self.assertTrue(all(map(lambda e: hasattr(e.root, 'getparent'), li_text)))
|
|
+ self.assertTrue(all([hasattr(e.root, 'getparent') for e in li_text]))
|
|
div_class = x.xpath('//div/@class')
|
|
- self.assertTrue(all(map(lambda e: hasattr(e.root, 'getparent'), div_class)))
|
|
+ self.assertTrue(all([hasattr(e.root, 'getparent') for e in div_class]))
|
|
|
|
def test_xml_entity_expansion(self):
|
|
- malicious_xml = u'<?xml version="1.0" encoding="ISO-8859-1"?>'\
|
|
+ malicious_xml = '<?xml version="1.0" encoding="ISO-8859-1"?>'\
|
|
'<!DOCTYPE foo [ <!ELEMENT foo ANY > <!ENTITY xxe SYSTEM '\
|
|
'"file:///etc/passwd" >]><foo>&xxe;</foo>'
|
|
|
|
@@ -743,8 +742,8 @@ def test_xml_entity_expansion(self):
|
|
self.assertEqual(sel.extract(), '<foo>&xxe;</foo>')
|
|
|
|
def test_configure_base_url(self):
|
|
- sel = self.sscls(text=u'nothing', base_url='http://example.com')
|
|
- self.assertEqual(u'http://example.com', sel.root.base)
|
|
+ sel = self.sscls(text='nothing', base_url='http://example.com')
|
|
+ self.assertEqual('http://example.com', sel.root.base)
|
|
|
|
def test_extending_selector(self):
|
|
class MySelectorList(Selector.selectorlist_cls):
|
|
@@ -753,33 +752,33 @@ class MySelectorList(Selector.selectorlist_cls):
|
|
class MySelector(Selector):
|
|
selectorlist_cls = MySelectorList
|
|
|
|
- sel = MySelector(text=u'<html><div>foo</div></html>')
|
|
+ sel = MySelector(text='<html><div>foo</div></html>')
|
|
self.assertIsInstance(sel.xpath('//div'), MySelectorList)
|
|
self.assertIsInstance(sel.xpath('//div')[0], MySelector)
|
|
self.assertIsInstance(sel.css('div'), MySelectorList)
|
|
self.assertIsInstance(sel.css('div')[0], MySelector)
|
|
|
|
def test_replacement_null_char_from_body(self):
|
|
- text = u'<html>\x00<body><p>Grainy</p></body></html>'
|
|
- self.assertEqual(u'<html><body><p>Grainy</p></body></html>',
|
|
+ text = '<html>\x00<body><p>Grainy</p></body></html>'
|
|
+ self.assertEqual('<html><body><p>Grainy</p></body></html>',
|
|
self.sscls(text).extract())
|
|
|
|
def test_remove_selector_list(self):
|
|
- sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
+ sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
sel_list = sel.css('li')
|
|
sel_list.remove()
|
|
self.assertIsInstance(sel.css('li'), self.sscls.selectorlist_cls)
|
|
self.assertEqual(sel.css('li'), [])
|
|
|
|
def test_remove_selector(self):
|
|
- sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
+ sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
sel_list = sel.css('li')
|
|
sel_list[0].remove()
|
|
self.assertIsInstance(sel.css('li'), self.sscls.selectorlist_cls)
|
|
self.assertEqual(sel.css('li::text').getall(), ['2', '3'])
|
|
|
|
def test_remove_pseudo_element_selector_list(self):
|
|
- sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
+ sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
sel_list = sel.css('li::text')
|
|
self.assertEqual(sel_list.getall(), ['1', '2', '3'])
|
|
with self.assertRaises(CannotRemoveElementWithoutRoot):
|
|
@@ -789,7 +788,7 @@ def test_remove_pseudo_element_selector_list(self):
|
|
self.assertEqual(sel.css('li::text').getall(), ['1', '2', '3'])
|
|
|
|
def test_remove_pseudo_element_selector(self):
|
|
- sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
+ sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
sel_list = sel.css('li::text')
|
|
self.assertEqual(sel_list.getall(), ['1', '2', '3'])
|
|
with self.assertRaises(CannotRemoveElementWithoutRoot):
|
|
@@ -799,7 +798,7 @@ def test_remove_pseudo_element_selector(self):
|
|
self.assertEqual(sel.css('li::text').getall(), ['1', '2', '3'])
|
|
|
|
def test_remove_root_element_selector(self):
|
|
- sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
+ sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>')
|
|
sel_list = sel.css('li::text')
|
|
self.assertEqual(sel_list.getall(), ['1', '2', '3'])
|
|
with self.assertRaises(CannotRemoveElementWithoutParent):
|
|
@@ -821,7 +820,7 @@ class ExsltTestCase(unittest.TestCase):
|
|
|
|
def test_regexp(self):
|
|
"""EXSLT regular expression tests"""
|
|
- body = u"""
|
|
+ body = """
|
|
<p><input name='a' value='1'/><input name='b' value='2'/></p>
|
|
<div class="links">
|
|
<a href="/first.html">first link</a>
|
|
@@ -840,43 +839,43 @@ def test_regexp(self):
|
|
[x.extract()
|
|
for x in sel.xpath(
|
|
r'//a[re:test(@href, "\.html$")]/text()')],
|
|
- [u'first link', u'second link'])
|
|
+ ['first link', 'second link'])
|
|
self.assertEqual(
|
|
[x.extract()
|
|
for x in sel.xpath(
|
|
'//a[re:test(@href, "first")]/text()')],
|
|
- [u'first link'])
|
|
+ ['first link'])
|
|
self.assertEqual(
|
|
[x.extract()
|
|
for x in sel.xpath(
|
|
'//a[re:test(@href, "second")]/text()')],
|
|
- [u'second link'])
|
|
+ ['second link'])
|
|
|
|
# re:match() is rather special: it returns a node-set of <match> nodes
|
|
- # [u'<match>http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml</match>',
|
|
- # u'<match>http</match>',
|
|
- # u'<match>www.bayes.co.uk</match>',
|
|
- # u'<match></match>',
|
|
- # u'<match>/xml/index.xml?/xml/utils/rechecker.xml</match>']
|
|
+ # ['<match>http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml</match>',
|
|
+ # '<match>http</match>',
|
|
+ # '<match>www.bayes.co.uk</match>',
|
|
+ # '<match></match>',
|
|
+ # '<match>/xml/index.xml?/xml/utils/rechecker.xml</match>']
|
|
self.assertEqual(
|
|
sel.xpath(r're:match(//a[re:test(@href, "\.xml$")]/@href,'
|
|
r'"(\w+):\/\/([^/:]+)(:\d*)?([^# ]*)")/text()').extract(),
|
|
- [u'http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml',
|
|
- u'http',
|
|
- u'www.bayes.co.uk',
|
|
- u'',
|
|
- u'/xml/index.xml?/xml/utils/rechecker.xml'])
|
|
+ ['http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml',
|
|
+ 'http',
|
|
+ 'www.bayes.co.uk',
|
|
+ '',
|
|
+ '/xml/index.xml?/xml/utils/rechecker.xml'])
|
|
|
|
# re:replace()
|
|
self.assertEqual(
|
|
sel.xpath(r're:replace(//a[re:test(@href, "\.xml$")]/@href,'
|
|
r'"(\w+)://(.+)(\.xml)", "","https://\2.html")').extract(),
|
|
- [u'https://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.html'])
|
|
+ ['https://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.html'])
|
|
|
|
def test_set(self):
|
|
"""EXSLT set manipulation tests"""
|
|
# microdata example from http://schema.org/Event
|
|
- body = u"""
|
|
+ body = """
|
|
<div itemscope itemtype="http://schema.org/Event">
|
|
<a itemprop="url" href="nba-miami-philidelphia-game3.html">
|
|
NBA Eastern Conference First Round Playoff Tickets:
|
|
@@ -908,17 +907,17 @@ def test_set(self):
|
|
self.assertEqual(
|
|
sel.xpath('''//div[@itemtype="http://schema.org/Event"]
|
|
//@itemprop''').extract(),
|
|
- [u'url',
|
|
- u'name',
|
|
- u'startDate',
|
|
- u'location',
|
|
- u'url',
|
|
- u'address',
|
|
- u'addressLocality',
|
|
- u'addressRegion',
|
|
- u'offers',
|
|
- u'lowPrice',
|
|
- u'offerCount']
|
|
+ ['url',
|
|
+ 'name',
|
|
+ 'startDate',
|
|
+ 'location',
|
|
+ 'url',
|
|
+ 'address',
|
|
+ 'addressLocality',
|
|
+ 'addressRegion',
|
|
+ 'offers',
|
|
+ 'lowPrice',
|
|
+ 'offerCount']
|
|
)
|
|
|
|
self.assertEqual(sel.xpath('''
|
|
@@ -926,4 +925,4 @@ def test_set(self):
|
|
//@itemprop,
|
|
//div[@itemtype="http://schema.org/Event"]
|
|
//*[@itemscope]/*/@itemprop)''').extract(),
|
|
- [u'url', u'name', u'startDate', u'location', u'offers'])
|
|
+ ['url', 'name', 'startDate', 'location', 'offers'])
|
|
diff --git a/tests/test_selector_csstranslator.py b/tests/test_selector_csstranslator.py
|
|
index 83ed066..ae9ffc0 100644
|
|
--- a/tests/test_selector_csstranslator.py
|
|
+++ b/tests/test_selector_csstranslator.py
|
|
@@ -8,7 +8,7 @@
|
|
from cssselect.xpath import ExpressionError
|
|
|
|
|
|
-HTMLBODY = u'''
|
|
+HTMLBODY = '''
|
|
<html>
|
|
<body>
|
|
<div>
|
|
@@ -52,10 +52,10 @@ def setUp(self):
|
|
|
|
def test_attr_function(self):
|
|
cases = [
|
|
- ('::attr(name)', u'descendant-or-self::*/@name'),
|
|
- ('a::attr(href)', u'descendant-or-self::a/@href'),
|
|
- ('a ::attr(img)', u'descendant-or-self::a/descendant-or-self::*/@img'),
|
|
- ('a > ::attr(class)', u'descendant-or-self::a/*/@class'),
|
|
+ ('::attr(name)', 'descendant-or-self::*/@name'),
|
|
+ ('a::attr(href)', 'descendant-or-self::a/@href'),
|
|
+ ('a ::attr(img)', 'descendant-or-self::a/descendant-or-self::*/@img'),
|
|
+ ('a > ::attr(class)', 'descendant-or-self::a/*/@class'),
|
|
]
|
|
for css, xpath in cases:
|
|
self.assertEqual(self.c2x(css), xpath, css)
|
|
@@ -71,17 +71,17 @@ def test_attr_function_exception(self):
|
|
|
|
def test_text_pseudo_element(self):
|
|
cases = [
|
|
- ('::text', u'descendant-or-self::text()'),
|
|
- ('p::text', u'descendant-or-self::p/text()'),
|
|
- ('p ::text', u'descendant-or-self::p/descendant-or-self::text()'),
|
|
- ('#id::text', u"descendant-or-self::*[@id = 'id']/text()"),
|
|
- ('p#id::text', u"descendant-or-self::p[@id = 'id']/text()"),
|
|
- ('p#id ::text', u"descendant-or-self::p[@id = 'id']/descendant-or-self::text()"),
|
|
- ('p#id > ::text', u"descendant-or-self::p[@id = 'id']/*/text()"),
|
|
- ('p#id ~ ::text', u"descendant-or-self::p[@id = 'id']/following-sibling::*/text()"),
|
|
- ('a[href]::text', u'descendant-or-self::a[@href]/text()'),
|
|
- ('a[href] ::text', u'descendant-or-self::a[@href]/descendant-or-self::text()'),
|
|
- ('p::text, a::text', u"descendant-or-self::p/text() | descendant-or-self::a/text()"),
|
|
+ ('::text', 'descendant-or-self::text()'),
|
|
+ ('p::text', 'descendant-or-self::p/text()'),
|
|
+ ('p ::text', 'descendant-or-self::p/descendant-or-self::text()'),
|
|
+ ('#id::text', "descendant-or-self::*[@id = 'id']/text()"),
|
|
+ ('p#id::text', "descendant-or-self::p[@id = 'id']/text()"),
|
|
+ ('p#id ::text', "descendant-or-self::p[@id = 'id']/descendant-or-self::text()"),
|
|
+ ('p#id > ::text', "descendant-or-self::p[@id = 'id']/*/text()"),
|
|
+ ('p#id ~ ::text', "descendant-or-self::p[@id = 'id']/following-sibling::*/text()"),
|
|
+ ('a[href]::text', 'descendant-or-self::a[@href]/text()'),
|
|
+ ('a[href] ::text', 'descendant-or-self::a[@href]/descendant-or-self::text()'),
|
|
+ ('p::text, a::text', "descendant-or-self::p/text() | descendant-or-self::a/text()"),
|
|
]
|
|
for css, xpath in cases:
|
|
self.assertEqual(self.c2x(css), xpath, css)
|
|
@@ -122,7 +122,7 @@ class GenericTranslatorTest(TranslatorTestMixin, unittest.TestCase):
|
|
class UtilCss2XPathTest(unittest.TestCase):
|
|
def test_css2xpath(self):
|
|
from parsel import css2xpath
|
|
- expected_xpath = (u"descendant-or-self::*[@class and contains("
|
|
+ expected_xpath = ("descendant-or-self::*[@class and contains("
|
|
"concat(' ', normalize-space(@class), ' '), ' some-class ')]")
|
|
self.assertEqual(css2xpath('.some-class'), expected_xpath)
|
|
|
|
@@ -144,22 +144,22 @@ def test_selector_simple(self):
|
|
[x.extract() for x in self.sel.css('input')])
|
|
|
|
def test_text_pseudo_element(self):
|
|
- self.assertEqual(self.x('#p-b2'), [u'<b id="p-b2">guy</b>'])
|
|
- self.assertEqual(self.x('#p-b2::text'), [u'guy'])
|
|
- self.assertEqual(self.x('#p-b2 ::text'), [u'guy'])
|
|
- self.assertEqual(self.x('#paragraph::text'), [u'lorem ipsum text'])
|
|
- self.assertEqual(self.x('#paragraph ::text'), [u'lorem ipsum text', u'hi', u'there', u'guy'])
|
|
- self.assertEqual(self.x('p::text'), [u'lorem ipsum text'])
|
|
- self.assertEqual(self.x('p ::text'), [u'lorem ipsum text', u'hi', u'there', u'guy'])
|
|
+ self.assertEqual(self.x('#p-b2'), ['<b id="p-b2">guy</b>'])
|
|
+ self.assertEqual(self.x('#p-b2::text'), ['guy'])
|
|
+ self.assertEqual(self.x('#p-b2 ::text'), ['guy'])
|
|
+ self.assertEqual(self.x('#paragraph::text'), ['lorem ipsum text'])
|
|
+ self.assertEqual(self.x('#paragraph ::text'), ['lorem ipsum text', 'hi', 'there', 'guy'])
|
|
+ self.assertEqual(self.x('p::text'), ['lorem ipsum text'])
|
|
+ self.assertEqual(self.x('p ::text'), ['lorem ipsum text', 'hi', 'there', 'guy'])
|
|
|
|
def test_attribute_function(self):
|
|
- self.assertEqual(self.x('#p-b2::attr(id)'), [u'p-b2'])
|
|
- self.assertEqual(self.x('.cool-footer::attr(class)'), [u'cool-footer'])
|
|
- self.assertEqual(self.x('.cool-footer ::attr(id)'), [u'foobar-div', u'foobar-span'])
|
|
- self.assertEqual(self.x('map[name="dummymap"] ::attr(shape)'), [u'circle', u'default'])
|
|
+ self.assertEqual(self.x('#p-b2::attr(id)'), ['p-b2'])
|
|
+ self.assertEqual(self.x('.cool-footer::attr(class)'), ['cool-footer'])
|
|
+ self.assertEqual(self.x('.cool-footer ::attr(id)'), ['foobar-div', 'foobar-span'])
|
|
+ self.assertEqual(self.x('map[name="dummymap"] ::attr(shape)'), ['circle', 'default'])
|
|
|
|
def test_nested_selector(self):
|
|
self.assertEqual(self.sel.css('p').css('b::text').extract(),
|
|
- [u'hi', u'guy'])
|
|
+ ['hi', 'guy'])
|
|
self.assertEqual(self.sel.css('div').css('area:last-child').extract(),
|
|
- [u'<area shape="default" id="area-nohref">'])
|
|
+ ['<area shape="default" id="area-nohref">'])
|
|
diff --git a/tests/test_utils.py b/tests/test_utils.py
|
|
index 47d44f3..9eede53 100644
|
|
--- a/tests/test_utils.py
|
|
+++ b/tests/test_utils.py
|
|
@@ -1,29 +1,28 @@
|
|
from parsel.utils import shorten, extract_regex
|
|
|
|
from pytest import mark, raises
|
|
-import six
|
|
|
|
|
|
@mark.parametrize(
|
|
'width,expected',
|
|
(
|
|
(-1, ValueError),
|
|
- (0, u''),
|
|
- (1, u'.'),
|
|
- (2, u'..'),
|
|
- (3, u'...'),
|
|
- (4, u'f...'),
|
|
- (5, u'fo...'),
|
|
- (6, u'foobar'),
|
|
- (7, u'foobar'),
|
|
+ (0, ''),
|
|
+ (1, '.'),
|
|
+ (2, '..'),
|
|
+ (3, '...'),
|
|
+ (4, 'f...'),
|
|
+ (5, 'fo...'),
|
|
+ (6, 'foobar'),
|
|
+ (7, 'foobar'),
|
|
)
|
|
)
|
|
def test_shorten(width, expected):
|
|
- if isinstance(expected, six.string_types):
|
|
- assert shorten(u'foobar', width) == expected
|
|
+ if isinstance(expected, str):
|
|
+ assert shorten('foobar', width) == expected
|
|
else:
|
|
with raises(expected):
|
|
- shorten(u'foobar', width)
|
|
+ shorten('foobar', width)
|
|
|
|
|
|
@mark.parametrize('regex, text, replace_entities, expected', (
|
|
diff --git a/tests/test_xpathfuncs.py b/tests/test_xpathfuncs.py
|
|
index cfa2579..8bcabd0 100644
|
|
--- a/tests/test_xpathfuncs.py
|
|
+++ b/tests/test_xpathfuncs.py
|
|
@@ -1,5 +1,3 @@
|
|
-# coding: utf-8
|
|
-
|
|
from parsel import Selector
|
|
from parsel.xpathfuncs import set_xpathfunc
|
|
import unittest
|
|
@@ -7,7 +5,7 @@
|
|
|
|
class XPathFuncsTestCase(unittest.TestCase):
|
|
def test_has_class_simple(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<p class="foo bar-baz">First</p>
|
|
<p class="foo">Second</p>
|
|
<p class="bar">Third</p>
|
|
@@ -16,80 +14,80 @@ def test_has_class_simple(self):
|
|
sel = Selector(text=body)
|
|
self.assertEqual(
|
|
[x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')],
|
|
- [u'First', u'Second'])
|
|
+ ['First', 'Second'])
|
|
self.assertEqual(
|
|
[x.extract() for x in sel.xpath('//p[has-class("bar")]/text()')],
|
|
- [u'Third'])
|
|
+ ['Third'])
|
|
self.assertEqual(
|
|
[x.extract() for x in sel.xpath('//p[has-class("foo","bar")]/text()')],
|
|
[])
|
|
self.assertEqual(
|
|
[x.extract() for x in sel.xpath('//p[has-class("foo","bar-baz")]/text()')],
|
|
- [u'First'])
|
|
+ ['First'])
|
|
|
|
def test_has_class_error_no_args(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<p CLASS="foo">First</p>
|
|
"""
|
|
sel = Selector(text=body)
|
|
- self.assertRaisesRegexp(
|
|
+ self.assertRaisesRegex(
|
|
ValueError, 'has-class must have at least 1 argument',
|
|
sel.xpath, 'has-class()')
|
|
|
|
def test_has_class_error_invalid_arg_type(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<p CLASS="foo">First</p>
|
|
"""
|
|
sel = Selector(text=body)
|
|
- self.assertRaisesRegexp(
|
|
+ self.assertRaisesRegex(
|
|
ValueError, 'has-class arguments must be strings',
|
|
sel.xpath, 'has-class(.)')
|
|
|
|
def test_has_class_error_invalid_unicode(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<p CLASS="foo">First</p>
|
|
"""
|
|
sel = Selector(text=body)
|
|
- self.assertRaisesRegexp(
|
|
+ self.assertRaisesRegex(
|
|
ValueError, 'All strings must be XML compatible',
|
|
- sel.xpath, u'has-class("héllö")'.encode('utf-8'))
|
|
+ sel.xpath, 'has-class("héllö")'.encode('utf-8'))
|
|
|
|
def test_has_class_unicode(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<p CLASS="fóó">First</p>
|
|
"""
|
|
sel = Selector(text=body)
|
|
self.assertEqual(
|
|
- [x.extract() for x in sel.xpath(u'//p[has-class("fóó")]/text()')],
|
|
- [u'First'])
|
|
+ [x.extract() for x in sel.xpath('//p[has-class("fóó")]/text()')],
|
|
+ ['First'])
|
|
|
|
def test_has_class_uppercase(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<p CLASS="foo">First</p>
|
|
"""
|
|
sel = Selector(text=body)
|
|
self.assertEqual(
|
|
[x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')],
|
|
- [u'First'])
|
|
+ ['First'])
|
|
|
|
def test_has_class_newline(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<p CLASS="foo
|
|
bar">First</p>
|
|
"""
|
|
sel = Selector(text=body)
|
|
self.assertEqual(
|
|
- [x.extract() for x in sel.xpath(u'//p[has-class("foo")]/text()')],
|
|
- [u'First'])
|
|
+ [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')],
|
|
+ ['First'])
|
|
|
|
def test_has_class_tab(self):
|
|
- body = u"""
|
|
+ body = """
|
|
<p CLASS="foo\tbar">First</p>
|
|
"""
|
|
sel = Selector(text=body)
|
|
self.assertEqual(
|
|
- [x.extract() for x in sel.xpath(u'//p[has-class("foo")]/text()')],
|
|
- [u'First'])
|
|
+ [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')],
|
|
+ ['First'])
|
|
|
|
def test_set_xpathfunc(self):
|
|
|
|
@@ -98,11 +96,11 @@ def myfunc(ctx):
|
|
|
|
myfunc.call_count = 0
|
|
|
|
- body = u"""
|
|
+ body = """
|
|
<p CLASS="foo">First</p>
|
|
"""
|
|
sel = Selector(text=body)
|
|
- self.assertRaisesRegexp(
|
|
+ self.assertRaisesRegex(
|
|
ValueError, 'Unregistered function in myfunc',
|
|
sel.xpath, 'myfunc()')
|
|
|
|
@@ -111,6 +109,6 @@ def myfunc(ctx):
|
|
self.assertEqual(myfunc.call_count, 1)
|
|
|
|
set_xpathfunc('myfunc', None)
|
|
- self.assertRaisesRegexp(
|
|
+ self.assertRaisesRegex(
|
|
ValueError, 'Unregistered function in myfunc',
|
|
sel.xpath, 'myfunc()')
|
|
|