-
- ...
- first item -@@ -487,7 +485,7 @@ extracting text elements for example. - Example extracting microdata (sample content taken from http://schema.org/Product) - with groups of itemscopes and corresponding itemprops:: - -- >>> doc = u""" -+ >>> doc = """ - ...
- 1
- 2
- 3
- 1
- 2
- 3
- 1
- 2
- 1
- 2
- 1
- 2
- 1
- 2
- 1
- 2
- 3
- 1
- 2
- 3
- 2 ') -- self.assertEqual(sel.xpath('//ul/li[position()>1]/text()')[0].get(), u'2') -+ self.assertEqual(sel.xpath('//ul/li[position()>1]')[0].get(), '
- 2 ') -+ self.assertEqual(sel.xpath('//ul/li[position()>1]/text()')[0].get(), '2') - - def test_selector_getall_alias(self): - """Test if get() returns extracted value on a Selector""" -- body = u'
- 1
- 2
- 3
- 1
- 2
- 3
- 2 ']) -- self.assertListEqual(sel.xpath('//ul/li[position()>1]/text()')[0].getall(), [u'2']) -+ self.assertListEqual(sel.xpath('//ul/li[position()>1]')[0].getall(), ['
- 2 ']) -+ self.assertListEqual(sel.xpath('//ul/li[position()>1]/text()')[0].getall(), ['2']) - - def test_selectorlist_get_alias(self): - """Test if get() returns first element for a selection call""" -- body = u'
- 1
- 2
- 3
- 1
- 2
- 3
- 1 ') -- self.assertEqual(sel.xpath('//ul/li/text()').get(), u'1') -+ self.assertEqual(sel.xpath('//ul/li').get(), '
- 1 ') -+ self.assertEqual(sel.xpath('//ul/li/text()').get(), '1') - - def test_re_first(self): - """Test if re_first() returns first matched element""" -- body = u'
- 1
- 2
- 1
- 2
- 1
- 2
- 1
- 2
- one
- two -@@ -322,7 +322,7 @@ def test_nested_selectors(self): - - def test_selectorlist_getall_alias(self): - """Nested selector tests using getall()""" -- body = u""" -+ body = """ -
- one
- two -@@ -346,20 +346,20 @@ def test_selectorlist_getall_alias(self): - self.assertEqual(divtwo.xpath("./li").getall(), []) - - def test_mixed_nested_selectors(self): -- body = u''' -+ body = ''' -
- Name: John -
- Age: 10 -@@ -519,23 +519,23 @@ def test_re(self): - ["10", "20"]) - - # Test named group, hit and miss -- x = self.sscls(text=u'foobar') -+ x = self.sscls(text='foobar') - self.assertEqual(x.re('(?P
- one
- two -@@ -723,18 +722,18 @@ class SmartStringsSelector(Selector): - # only when smart_strings are on - x = self.sscls(text=body) - li_text = x.xpath('//li/text()') -- self.assertFalse(any(map(lambda e: hasattr(e.root, 'getparent'), li_text))) -+ self.assertFalse(any([hasattr(e.root, 'getparent') for e in li_text])) - div_class = x.xpath('//div/@class') -- self.assertFalse(any(map(lambda e: hasattr(e.root, 'getparent'), div_class))) -+ self.assertFalse(any([hasattr(e.root, 'getparent') for e in div_class])) - - x = SmartStringsSelector(text=body) - li_text = x.xpath('//li/text()') -- self.assertTrue(all(map(lambda e: hasattr(e.root, 'getparent'), li_text))) -+ self.assertTrue(all([hasattr(e.root, 'getparent') for e in li_text])) - div_class = x.xpath('//div/@class') -- self.assertTrue(all(map(lambda e: hasattr(e.root, 'getparent'), div_class))) -+ self.assertTrue(all([hasattr(e.root, 'getparent') for e in div_class])) - - def test_xml_entity_expansion(self): -- malicious_xml = u''\ -+ malicious_xml = ''\ - ' ]>
- 1
- 2
- 3
- 1
- 2
- 3
- 1
- 2
- 3
- 1
- 2
- 3
- 1
- 2
- 3
- 1
- 2
- 3
- 1
- 2
- 3
- 1
- 2
- 3
- 1
- 2
- 3
- 1
- 2
- 3
-@@ -591,7 +589,7 @@ returns ``True`` for nodes that have all of the specified HTML classes::
- ... Fourth
- ... """) - ... -- >>> sel = Selector(u""" -+ >>> sel = Selector(""" - ... - ...Second
- ... -@@ -1111,7 +1109,7 @@ Named variables can be useful when strings need to be escaped for single - or double quotes characters. The example below would be a bit tricky to - get right (or legible) without a variable reference:: - -- >>> html = u''' -+ >>> html = ''' - ... - ...He said: "I don't know why, but I like mixing single and double quotes!"
- ... -diff --git a/parsel/csstranslator.py b/parsel/csstranslator.py -index 747e808..3881736 100644 ---- a/parsel/csstranslator.py -+++ b/parsel/csstranslator.py -@@ -1,7 +1,4 @@ --try: -- from functools import lru_cache --except ImportError: -- from functools32 import lru_cache -+from functools import lru_cache - - from cssselect import GenericTranslator as OriginalGenericTranslator - from cssselect import HTMLTranslator as OriginalHTMLTranslator -@@ -23,7 +20,7 @@ def from_xpath(cls, xpath, textnode=False, attribute=None): - return x - - def __str__(self): -- path = super(XPathExpr, self).__str__() -+ path = super().__str__() - if self.textnode: - if path == '*': - path = 'text()' -@@ -40,20 +37,20 @@ def __str__(self): - return path - - def join(self, combiner, other): -- super(XPathExpr, self).join(combiner, other) -+ super().join(combiner, other) - self.textnode = other.textnode - self.attribute = other.attribute - return self - - --class TranslatorMixin(object): -+class TranslatorMixin: - """This mixin adds support to CSS pseudo elements via dynamic dispatch. - - Currently supported pseudo-elements are ``::text`` and ``::attr(ATTR_NAME)``. - """ - - def xpath_element(self, selector): -- xpath = super(TranslatorMixin, self).xpath_element(selector) -+ xpath = super().xpath_element(selector) - return XPathExpr.from_xpath(xpath) - - def xpath_pseudo_element(self, xpath, pseudo_element): -@@ -98,13 +95,13 @@ def xpath_text_simple_pseudo_element(self, xpath): - class GenericTranslator(TranslatorMixin, OriginalGenericTranslator): - @lru_cache(maxsize=256) - def css_to_xpath(self, css, prefix='descendant-or-self::'): -- return super(GenericTranslator, self).css_to_xpath(css, prefix) -+ return super().css_to_xpath(css, prefix) - - - class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator): - @lru_cache(maxsize=256) - def css_to_xpath(self, css, prefix='descendant-or-self::'): -- return super(HTMLTranslator, self).css_to_xpath(css, prefix) -+ return super().css_to_xpath(css, prefix) - - - _translator = HTMLTranslator() -diff --git a/parsel/selector.py b/parsel/selector.py -index 504a4fe..b644e82 100644 ---- a/parsel/selector.py -+++ b/parsel/selector.py -@@ -2,9 +2,6 @@ - XPath selectors based on lxml - """ - --import sys -- --import six - from lxml import etree, html - - from .utils import flatten, iflatten, extract_regex, shorten -@@ -22,7 +19,7 @@ class CannotRemoveElementWithoutParent(Exception): - class SafeXMLParser(etree.XMLParser): - def __init__(self, *args, **kwargs): - kwargs.setdefault('resolve_entities', False) -- super(SafeXMLParser, self).__init__(*args, **kwargs) -+ super().__init__(*args, **kwargs) - - - _ctgroup = { -@@ -61,13 +58,8 @@ class SelectorList(list): - class, which provides a few additional methods. - """ - -- # __getslice__ is deprecated but `list` builtin implements it only in Py2 -- def __getslice__(self, i, j): -- o = super(SelectorList, self).__getslice__(i, j) -- return self.__class__(o) -- - def __getitem__(self, pos): -- o = super(SelectorList, self).__getitem__(pos) -+ o = super().__getitem__(pos) - return self.__class__(o) if isinstance(pos, slice) else o - - def __getstate__(self): -@@ -164,7 +156,7 @@ def remove(self): - x.remove() - - --class Selector(object): -+class Selector: - """ - :class:`Selector` allows you to select parts of an XML or HTML text using CSS - or XPath expressions and extract data from it. -@@ -204,9 +196,10 @@ def __init__(self, text=None, type=None, namespaces=None, root=None, - self._tostring_method = _ctgroup[st]['_tostring_method'] - - if text is not None: -- if not isinstance(text, six.text_type): -- msg = "text argument should be of type %s, got %s" % ( -- six.text_type, text.__class__) -+ if not isinstance(text, str): -+ msg = "text argument should be of type str, got %s" % ( -+ text.__class__ -+ ) - raise TypeError(msg) - root = self._get_root(text, base_url) - elif root is None: -@@ -255,9 +248,7 @@ def xpath(self, query, namespaces=None, **kwargs): - smart_strings=self._lxml_smart_strings, - **kwargs) - except etree.XPathError as exc: -- msg = u"XPath error: %s in %s" % (exc, query) -- msg = msg if six.PY3 else msg.encode('unicode_escape') -- six.reraise(ValueError, ValueError(msg), sys.exc_info()[2]) -+ raise ValueError("XPath error: %s in %s" % (exc, query)) - - if type(result) is not list: - result = [result] -@@ -324,11 +315,11 @@ def get(self): - with_tail=False) - except (AttributeError, TypeError): - if self.root is True: -- return u'1' -+ return '1' - elif self.root is False: -- return u'0' -+ return '0' - else: -- return six.text_type(self.root) -+ return str(self.root) - extract = get - - def getall(self): -@@ -354,7 +345,7 @@ def remove_namespaces(self): - if el.tag.startswith('{'): - el.tag = el.tag.split('}', 1)[1] - # loop on element attributes also -- for an in el.attrib.keys(): -+ for an in el.attrib: - if an.startswith('{'): - el.attrib[an.split('}', 1)[1]] = el.attrib.pop(an) - # remove namespace declarations -diff --git a/parsel/utils.py b/parsel/utils.py -index 6914362..6aeff6f 100644 ---- a/parsel/utils.py -+++ b/parsel/utils.py -@@ -1,5 +1,4 @@ - import re --import six - from w3lib.html import replace_entities as w3lib_replace_entities - - -@@ -50,10 +49,10 @@ def _is_listlike(x): - True - >>> _is_listlike((x for x in range(3))) - True -- >>> _is_listlike(six.moves.xrange(5)) -+ >>> _is_listlike(range(5)) - True - """ -- return hasattr(x, "__iter__") and not isinstance(x, (six.text_type, bytes)) -+ return hasattr(x, "__iter__") and not isinstance(x, (str, bytes)) - - - def extract_regex(regex, text, replace_entities=True): -@@ -62,7 +61,7 @@ def extract_regex(regex, text, replace_entities=True): - * if the regex contains multiple numbered groups, all those will be returned (flattened) - * if the regex doesn't contain any group the entire regex matching is returned - """ -- if isinstance(regex, six.string_types): -+ if isinstance(regex, str): - regex = re.compile(regex, re.UNICODE) - - if 'extract' in regex.groupindex: -diff --git a/parsel/xpathfuncs.py b/parsel/xpathfuncs.py -index 95b07ba..ceb8eaf 100644 ---- a/parsel/xpathfuncs.py -+++ b/parsel/xpathfuncs.py -@@ -1,8 +1,6 @@ - import re - from lxml import etree - --from six import string_types -- - from w3lib.html import HTML5_WHITESPACE - - regex = '[{}]+'.format(HTML5_WHITESPACE) -@@ -45,7 +43,7 @@ def has_class(context, *classes): - raise ValueError( - 'XPath error: has-class must have at least 1 argument') - for c in classes: -- if not isinstance(c, string_types): -+ if not isinstance(c, str): - raise ValueError( - 'XPath error: has-class arguments must be strings') - context.eval_context['args_checked'] = True -diff --git a/setup.py b/setup.py -index d14ad0e..ade049f 100644 ---- a/setup.py -+++ b/setup.py -@@ -1,9 +1,5 @@ - #!/usr/bin/env python --# -*- coding: utf-8 -*- - --import sys -- --from pkg_resources import parse_version - from setuptools import setup, __version__ as setuptools_version - - -@@ -13,32 +9,6 @@ - with open('NEWS') as history_file: - history = history_file.read().replace('.. :changelog:', '') - --test_requirements = [ --] -- --def has_environment_marker_platform_impl_support(): -- """Code extracted from 'pytest/setup.py' -- https://github.com/pytest-dev/pytest/blob/7538680c/setup.py#L31 -- The first known release to support environment marker with range operators -- it is 18.5, see: -- https://setuptools.readthedocs.io/en/latest/history.html#id235 -- """ -- return parse_version(setuptools_version) >= parse_version('18.5') -- --install_requires = [ -- 'w3lib>=1.19.0', -- 'lxml', -- 'six>=1.6.0', -- 'cssselect>=0.9' --] --extras_require = {} -- --if not has_environment_marker_platform_impl_support(): -- if sys.version_info[0:2] < (3, 0): -- install_requires.append("functools32") --else: -- extras_require[":python_version<'3.0'"] = ["functools32"] -- - setup( - name='parsel', - version='1.6.0', -@@ -50,11 +20,16 @@ def has_environment_marker_platform_impl_support(): - packages=[ - 'parsel', - ], -- package_dir={'parsel': -- 'parsel'}, -+ package_dir={ -+ 'parsel': 'parsel', -+ }, - include_package_data=True, -- install_requires=install_requires, -- extras_require=extras_require, -+ install_requires=[ -+ 'cssselect>=0.9', -+ 'lxml', -+ 'w3lib>=1.19.0', -+ ], -+ python_requires='>=3.6', - license="BSD", - zip_safe=False, - keywords='parsel', -@@ -66,13 +41,11 @@ def has_environment_marker_platform_impl_support(): - 'Topic :: Text Processing :: Markup', - 'Topic :: Text Processing :: Markup :: HTML', - 'Topic :: Text Processing :: Markup :: XML', -- 'Programming Language :: Python :: 2', -- 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', -- 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', -+ 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', - ], -diff --git a/tests/test_selector.py b/tests/test_selector.py -index a5c61f6..f5c60ae 100644 ---- a/tests/test_selector.py -+++ b/tests/test_selector.py -@@ -1,7 +1,5 @@ --# -*- coding: utf-8 -*- - import re - import weakref --import six - import unittest - import pickle - -@@ -17,11 +15,11 @@ class SelectorTestCase(unittest.TestCase): - sscls = Selector - - def test_pickle_selector(self): -- sel = self.sscls(text=u'some text
') -+ sel = self.sscls(text='some text
') - self.assertRaises(TypeError, lambda s: pickle.dumps(s, protocol=2), sel) - - def test_pickle_selector_list(self): -- sel = self.sscls(text=u'I'm mixing single and -+ body = """
I'm mixing single and - "double quotes" and I don't care :)
""" - sel = self.sscls(text=body) - -@@ -95,7 +93,7 @@ def test_simple_selection_with_variables_escape_friendly(self): - - # with XPath variables, escaping is done for you - self.assertEqual([x.extract() for x in sel.xpath("//input[@value=$text]/@name", text=t)], -- [u'a']) -+ ['a']) - lt = """I'm mixing single and "double quotes" and I don't care :)""" - # the following gives you something like - # ValueError: XPath error: Invalid predicate in //p[normalize-space()='I'm mixing single and "double quotes" and I don't care :)']//@name -@@ -103,10 +101,10 @@ def test_simple_selection_with_variables_escape_friendly(self): - - self.assertEqual([x.extract() for x in sel.xpath("//p[normalize-space()=$lng]//@name", - lng=lt)], -- [u'a']) -+ ['a']) - - def test_accessing_attributes(self): -- body = u""" -+ body = """ - - --
-@@ -134,12 +132,10 @@ def test_accessing_attributes(self):
- [e.attrib for e in sel.css('li')])
-
- def test_representation_slice(self):
-- body = u"".format(50 * 'b')
-+ body = "".format(50 * 'b')
- sel = self.sscls(text=body)
-
- representation = "
test
' -+ text = '
test
' - assert isinstance(self.sscls(text=text).xpath("//p")[0], self.sscls) - assert isinstance(self.sscls(text=text).css("p")[0], self.sscls) - - def test_boolean_result(self): -- body = u"
" -+ body = "" - xs = self.sscls(text=body) -- self.assertEqual(xs.xpath("//input[@name='a']/@name='a'").extract(), [u'1']) -- self.assertEqual(xs.xpath("//input[@name='a']/@name='n'").extract(), [u'0']) -+ self.assertEqual(xs.xpath("//input[@name='a']/@name='a'").extract(), ['1']) -+ self.assertEqual(xs.xpath("//input[@name='a']/@name='n'").extract(), ['0']) - - def test_differences_parsing_xml_vs_html(self): - """Test that XML and HTML Selector's behave differently""" - # some text which is parsed differently by XML and HTML flavors -- text = u'
Hello

Hello

Hello

Hello

Hello

Hello
1
2
3
1
2
3
3
']) -- self.assertEqual(hs.css('p')[1:3].extract(), [u'2
', u'3
']) -+ self.assertEqual(hs.css('p')[2:3].extract(), ['3
']) -+ self.assertEqual(hs.css('p')[1:3].extract(), ['2
', '3
']) - - def test_nested_selectors(self): - """Nested selector tests""" -- body = u""" -+ body = """ --
-
-
-
text
foo-
-
an Jos\ufffd de
' -- self.assertEqual([u'an Jos\ufffd de'], -+ text = 'an Jos\\ufffd de
' -+ self.assertEqual(['an Jos\\ufffd de'], - self.sscls(text).xpath('//text()').extract()) - - def test_select_on_unevaluable_nodes(self): -- r = self.sscls(text=u'some text') -+ r = self.sscls(text='some text') - # Text node - x1 = r.xpath('//text()') -- self.assertEqual(x1.extract(), [u'some text']) -+ self.assertEqual(x1.extract(), ['some text']) - self.assertEqual(x1.xpath('.//b').extract(), []) - # Tag attribute - x1 = r.xpath('//span/@class') -- self.assertEqual(x1.extract(), [u'big']) -+ self.assertEqual(x1.extract(), ['big']) - self.assertEqual(x1.xpath('.//text()').extract(), []) - - def test_select_on_text_nodes(self): -- r = self.sscls(text=u'-
-
Grainy
' -- self.assertEqual(u'Grainy
', -+ text = '\x00Grainy
' -+ self.assertEqual('Grainy
', - self.sscls(text).extract()) - - def test_remove_selector_list(self): -- sel = self.sscls(text=u'