forked from pool/python-w3lib
Accepting request 1011020 from home:pgajdos:python
- do not require six
- added patches
c16d7bac3a
+ python-w3lib-no-six.patch
OBS-URL: https://build.opensuse.org/request/show/1011020
OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-w3lib?expand=0&rev=20
This commit is contained in:
641
python-w3lib-no-six.patch
Normal file
641
python-w3lib-no-six.patch
Normal file
@@ -0,0 +1,641 @@
|
|||||||
|
Index: w3lib-1.22.0/setup.py
|
||||||
|
===================================================================
|
||||||
|
--- w3lib-1.22.0.orig/setup.py
|
||||||
|
+++ w3lib-1.22.0/setup.py
|
||||||
|
@@ -29,5 +29,4 @@ setup(
|
||||||
|
'Programming Language :: Python :: Implementation :: PyPy',
|
||||||
|
'Topic :: Internet :: WWW/HTTP',
|
||||||
|
],
|
||||||
|
- install_requires=['six >= 1.4.1'],
|
||||||
|
)
|
||||||
|
Index: w3lib-1.22.0/tests/test_encoding.py
|
||||||
|
===================================================================
|
||||||
|
--- w3lib-1.22.0.orig/tests/test_encoding.py
|
||||||
|
+++ w3lib-1.22.0/tests/test_encoding.py
|
||||||
|
@@ -1,7 +1,14 @@
|
||||||
|
-import unittest, codecs
|
||||||
|
-import six
|
||||||
|
-from w3lib.encoding import (html_body_declared_encoding, read_bom, to_unicode,
|
||||||
|
- http_content_type_encoding, resolve_encoding, html_to_unicode)
|
||||||
|
+import codecs
|
||||||
|
+import unittest
|
||||||
|
+
|
||||||
|
+from w3lib.encoding import (
|
||||||
|
+ html_body_declared_encoding,
|
||||||
|
+ http_content_type_encoding,
|
||||||
|
+ html_to_unicode,
|
||||||
|
+ read_bom,
|
||||||
|
+ resolve_encoding,
|
||||||
|
+ to_unicode,
|
||||||
|
+)
|
||||||
|
|
||||||
|
class RequestEncodingTests(unittest.TestCase):
|
||||||
|
utf8_fragments = [
|
||||||
|
@@ -107,18 +114,18 @@ class HtmlConversionTests(unittest.TestC
|
||||||
|
original_string = unicode_string.encode('cp1251')
|
||||||
|
encoding, body_unicode = html_to_unicode(ct('cp1251'), original_string)
|
||||||
|
# check body_as_unicode
|
||||||
|
- self.assertTrue(isinstance(body_unicode, six.text_type))
|
||||||
|
+ self.assertTrue(isinstance(body_unicode, str))
|
||||||
|
self.assertEqual(body_unicode, unicode_string)
|
||||||
|
|
||||||
|
def _assert_encoding(self, content_type, body, expected_encoding,
|
||||||
|
expected_unicode):
|
||||||
|
- assert not isinstance(body, six.text_type)
|
||||||
|
+ assert not isinstance(body, str)
|
||||||
|
encoding, body_unicode = html_to_unicode(ct(content_type), body)
|
||||||
|
- self.assertTrue(isinstance(body_unicode, six.text_type))
|
||||||
|
+ self.assertTrue(isinstance(body_unicode, str))
|
||||||
|
self.assertEqual(norm_encoding(encoding),
|
||||||
|
norm_encoding(expected_encoding))
|
||||||
|
|
||||||
|
- if isinstance(expected_unicode, six.string_types):
|
||||||
|
+ if isinstance(expected_unicode, str):
|
||||||
|
self.assertEqual(body_unicode, expected_unicode)
|
||||||
|
else:
|
||||||
|
self.assertTrue(
|
||||||
|
@@ -177,9 +184,9 @@ class HtmlConversionTests(unittest.TestC
|
||||||
|
|
||||||
|
def _assert_encoding_detected(self, content_type, expected_encoding, body,
|
||||||
|
**kwargs):
|
||||||
|
- assert not isinstance(body, six.text_type)
|
||||||
|
+ assert not isinstance(body, str)
|
||||||
|
encoding, body_unicode = html_to_unicode(ct(content_type), body, **kwargs)
|
||||||
|
- self.assertTrue(isinstance(body_unicode, six.text_type))
|
||||||
|
+ self.assertTrue(isinstance(body_unicode, str))
|
||||||
|
self.assertEqual(norm_encoding(encoding), norm_encoding(expected_encoding))
|
||||||
|
|
||||||
|
def test_BOM(self):
|
||||||
|
Index: w3lib-1.22.0/tests/test_html.py
|
||||||
|
===================================================================
|
||||||
|
--- w3lib-1.22.0.orig/tests/test_html.py
|
||||||
|
+++ w3lib-1.22.0/tests/test_html.py
|
||||||
|
@@ -1,18 +1,25 @@
|
||||||
|
-# -*- coding: utf-8 -*-
|
||||||
|
import unittest
|
||||||
|
-import six
|
||||||
|
-from w3lib.html import (replace_entities, replace_tags, remove_comments,
|
||||||
|
- remove_tags_with_content, replace_escape_chars, remove_tags, unquote_markup,
|
||||||
|
- get_base_url, get_meta_refresh)
|
||||||
|
+
|
||||||
|
+from w3lib.html import (
|
||||||
|
+ get_base_url,
|
||||||
|
+ get_meta_refresh,
|
||||||
|
+ remove_comments,
|
||||||
|
+ remove_tags,
|
||||||
|
+ remove_tags_with_content,
|
||||||
|
+ replace_entities,
|
||||||
|
+ replace_escape_chars,
|
||||||
|
+ replace_tags,
|
||||||
|
+ unquote_markup,
|
||||||
|
+)
|
||||||
|
|
||||||
|
|
||||||
|
class RemoveEntitiesTest(unittest.TestCase):
|
||||||
|
def test_returns_unicode(self):
|
||||||
|
# make sure it always return uncode
|
||||||
|
- assert isinstance(replace_entities(b'no entities'), six.text_type)
|
||||||
|
- assert isinstance(replace_entities(b'Price: £100!'), six.text_type)
|
||||||
|
- assert isinstance(replace_entities(u'no entities'), six.text_type)
|
||||||
|
- assert isinstance(replace_entities(u'Price: £100!'), six.text_type)
|
||||||
|
+ assert isinstance(replace_entities(b'no entities'), str)
|
||||||
|
+ assert isinstance(replace_entities(b'Price: £100!'), str)
|
||||||
|
+ assert isinstance(replace_entities(u'no entities'), str)
|
||||||
|
+ assert isinstance(replace_entities(u'Price: £100!'), str)
|
||||||
|
|
||||||
|
def test_regular(self):
|
||||||
|
# regular conversions
|
||||||
|
@@ -71,8 +78,8 @@ class RemoveEntitiesTest(unittest.TestCa
|
||||||
|
class ReplaceTagsTest(unittest.TestCase):
|
||||||
|
def test_returns_unicode(self):
|
||||||
|
# make sure it always return uncode
|
||||||
|
- assert isinstance(replace_tags(b'no entities'), six.text_type)
|
||||||
|
- assert isinstance(replace_tags('no entities'), six.text_type)
|
||||||
|
+ assert isinstance(replace_tags(b'no entities'), str)
|
||||||
|
+ assert isinstance(replace_tags('no entities'), str)
|
||||||
|
|
||||||
|
def test_replace_tags(self):
|
||||||
|
self.assertEqual(replace_tags(u'This text contains <a>some tag</a>'),
|
||||||
|
@@ -88,10 +95,10 @@ class ReplaceTagsTest(unittest.TestCase)
|
||||||
|
class RemoveCommentsTest(unittest.TestCase):
|
||||||
|
def test_returns_unicode(self):
|
||||||
|
# make sure it always return unicode
|
||||||
|
- assert isinstance(remove_comments(b'without comments'), six.text_type)
|
||||||
|
- assert isinstance(remove_comments(b'<!-- with comments -->'), six.text_type)
|
||||||
|
- assert isinstance(remove_comments(u'without comments'), six.text_type)
|
||||||
|
- assert isinstance(remove_comments(u'<!-- with comments -->'), six.text_type)
|
||||||
|
+ assert isinstance(remove_comments(b'without comments'), str)
|
||||||
|
+ assert isinstance(remove_comments(b'<!-- with comments -->'), str)
|
||||||
|
+ assert isinstance(remove_comments(u'without comments'), str)
|
||||||
|
+ assert isinstance(remove_comments(u'<!-- with comments -->'), str)
|
||||||
|
|
||||||
|
def test_no_comments(self):
|
||||||
|
# text without comments
|
||||||
|
@@ -112,16 +119,16 @@ class RemoveCommentsTest(unittest.TestCa
|
||||||
|
class RemoveTagsTest(unittest.TestCase):
|
||||||
|
def test_returns_unicode(self):
|
||||||
|
# make sure it always return unicode
|
||||||
|
- assert isinstance(remove_tags(b'no tags'), six.text_type)
|
||||||
|
- assert isinstance(remove_tags(b'no tags', which_ones=('p',)), six.text_type)
|
||||||
|
- assert isinstance(remove_tags(b'<p>one tag</p>'), six.text_type)
|
||||||
|
- assert isinstance(remove_tags(b'<p>one tag</p>', which_ones=('p')), six.text_type)
|
||||||
|
- assert isinstance(remove_tags(b'<a>link</a>', which_ones=('b',)), six.text_type)
|
||||||
|
- assert isinstance(remove_tags(u'no tags'), six.text_type)
|
||||||
|
- assert isinstance(remove_tags(u'no tags', which_ones=('p',)), six.text_type)
|
||||||
|
- assert isinstance(remove_tags(u'<p>one tag</p>'), six.text_type)
|
||||||
|
- assert isinstance(remove_tags(u'<p>one tag</p>', which_ones=('p')), six.text_type)
|
||||||
|
- assert isinstance(remove_tags(u'<a>link</a>', which_ones=('b',)), six.text_type)
|
||||||
|
+ assert isinstance(remove_tags(b'no tags'), str)
|
||||||
|
+ assert isinstance(remove_tags(b'no tags', which_ones=('p',)), str)
|
||||||
|
+ assert isinstance(remove_tags(b'<p>one tag</p>'), str)
|
||||||
|
+ assert isinstance(remove_tags(b'<p>one tag</p>', which_ones=('p')), str)
|
||||||
|
+ assert isinstance(remove_tags(b'<a>link</a>', which_ones=('b',)), str)
|
||||||
|
+ assert isinstance(remove_tags(u'no tags'), str)
|
||||||
|
+ assert isinstance(remove_tags(u'no tags', which_ones=('p',)), str)
|
||||||
|
+ assert isinstance(remove_tags(u'<p>one tag</p>'), str)
|
||||||
|
+ assert isinstance(remove_tags(u'<p>one tag</p>', which_ones=('p')), str)
|
||||||
|
+ assert isinstance(remove_tags(u'<a>link</a>', which_ones=('b',)), str)
|
||||||
|
|
||||||
|
def test_remove_tags_without_tags(self):
|
||||||
|
# text without tags
|
||||||
|
@@ -160,14 +167,14 @@ class RemoveTagsTest(unittest.TestCase):
|
||||||
|
class RemoveTagsWithContentTest(unittest.TestCase):
|
||||||
|
def test_returns_unicode(self):
|
||||||
|
# make sure it always return unicode
|
||||||
|
- assert isinstance(remove_tags_with_content(b'no tags'), six.text_type)
|
||||||
|
- assert isinstance(remove_tags_with_content(b'no tags', which_ones=('p',)), six.text_type)
|
||||||
|
- assert isinstance(remove_tags_with_content(b'<p>one tag</p>', which_ones=('p',)), six.text_type)
|
||||||
|
- assert isinstance(remove_tags_with_content(b'<a>link</a>', which_ones=('b',)), six.text_type)
|
||||||
|
- assert isinstance(remove_tags_with_content(u'no tags'), six.text_type)
|
||||||
|
- assert isinstance(remove_tags_with_content(u'no tags', which_ones=('p',)), six.text_type)
|
||||||
|
- assert isinstance(remove_tags_with_content(u'<p>one tag</p>', which_ones=('p',)), six.text_type)
|
||||||
|
- assert isinstance(remove_tags_with_content(u'<a>link</a>', which_ones=('b',)), six.text_type)
|
||||||
|
+ assert isinstance(remove_tags_with_content(b'no tags'), str)
|
||||||
|
+ assert isinstance(remove_tags_with_content(b'no tags', which_ones=('p',)), str)
|
||||||
|
+ assert isinstance(remove_tags_with_content(b'<p>one tag</p>', which_ones=('p',)), str)
|
||||||
|
+ assert isinstance(remove_tags_with_content(b'<a>link</a>', which_ones=('b',)), str)
|
||||||
|
+ assert isinstance(remove_tags_with_content(u'no tags'), str)
|
||||||
|
+ assert isinstance(remove_tags_with_content(u'no tags', which_ones=('p',)), str)
|
||||||
|
+ assert isinstance(remove_tags_with_content(u'<p>one tag</p>', which_ones=('p',)), str)
|
||||||
|
+ assert isinstance(remove_tags_with_content(u'<a>link</a>', which_ones=('b',)), str)
|
||||||
|
|
||||||
|
def test_without_tags(self):
|
||||||
|
# text without tags
|
||||||
|
@@ -194,13 +201,13 @@ class RemoveTagsWithContentTest(unittest
|
||||||
|
class ReplaceEscapeCharsTest(unittest.TestCase):
|
||||||
|
def test_returns_unicode(self):
|
||||||
|
# make sure it always return unicode
|
||||||
|
- assert isinstance(replace_escape_chars(b'no ec'), six.text_type)
|
||||||
|
- assert isinstance(replace_escape_chars(b'no ec', replace_by='str'), six.text_type)
|
||||||
|
- assert isinstance(replace_escape_chars(b'no ec', replace_by=u'str'), six.text_type)
|
||||||
|
- assert isinstance(replace_escape_chars(b'no ec', which_ones=('\n', '\t',)), six.text_type)
|
||||||
|
- assert isinstance(replace_escape_chars(u'no ec'), six.text_type)
|
||||||
|
- assert isinstance(replace_escape_chars(u'no ec', replace_by=u'str'), six.text_type)
|
||||||
|
- assert isinstance(replace_escape_chars(u'no ec', which_ones=('\n', '\t',)), six.text_type)
|
||||||
|
+ assert isinstance(replace_escape_chars(b'no ec'), str)
|
||||||
|
+ assert isinstance(replace_escape_chars(b'no ec', replace_by='str'), str)
|
||||||
|
+ assert isinstance(replace_escape_chars(b'no ec', replace_by=u'str'), str)
|
||||||
|
+ assert isinstance(replace_escape_chars(b'no ec', which_ones=('\n', '\t',)), str)
|
||||||
|
+ assert isinstance(replace_escape_chars(u'no ec'), str)
|
||||||
|
+ assert isinstance(replace_escape_chars(u'no ec', replace_by=u'str'), str)
|
||||||
|
+ assert isinstance(replace_escape_chars(u'no ec', which_ones=('\n', '\t',)), str)
|
||||||
|
|
||||||
|
def test_without_escape_chars(self):
|
||||||
|
# text without escape chars
|
||||||
|
@@ -226,8 +233,8 @@ class UnquoteMarkupTest(unittest.TestCas
|
||||||
|
|
||||||
|
def test_returns_unicode(self):
|
||||||
|
# make sure it always return unicode
|
||||||
|
- assert isinstance(unquote_markup(self.sample_txt1.encode('latin-1')), six.text_type)
|
||||||
|
- assert isinstance(unquote_markup(self.sample_txt2), six.text_type)
|
||||||
|
+ assert isinstance(unquote_markup(self.sample_txt1.encode('latin-1')), str)
|
||||||
|
+ assert isinstance(unquote_markup(self.sample_txt2), str)
|
||||||
|
|
||||||
|
def test_unquote_markup(self):
|
||||||
|
self.assertEqual(unquote_markup(self.sample_txt1), u"""<node1>hi, this is sample text with entities: & \xa9
|
||||||
|
Index: w3lib-1.22.0/tests/test_url.py
|
||||||
|
===================================================================
|
||||||
|
--- w3lib-1.22.0.orig/tests/test_url.py
|
||||||
|
+++ w3lib-1.22.0/tests/test_url.py
|
||||||
|
@@ -1,15 +1,25 @@
|
||||||
|
-# -*- coding: utf-8 -*-
|
||||||
|
-from __future__ import absolute_import
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
+from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
-from six.moves.urllib.parse import urlparse
|
||||||
|
|
||||||
|
-from w3lib.url import (is_url, safe_url_string, safe_download_url,
|
||||||
|
- url_query_parameter, add_or_replace_parameter, url_query_cleaner,
|
||||||
|
- file_uri_to_path, parse_data_uri, path_to_file_uri, any_to_uri,
|
||||||
|
- urljoin_rfc, canonicalize_url, parse_url, add_or_replace_parameters)
|
||||||
|
+from w3lib.url import (
|
||||||
|
+ add_or_replace_parameter,
|
||||||
|
+ add_or_replace_parameters,
|
||||||
|
+ any_to_uri,
|
||||||
|
+ canonicalize_url,
|
||||||
|
+ file_uri_to_path,
|
||||||
|
+ is_url,
|
||||||
|
+ parse_data_uri,
|
||||||
|
+ parse_url,
|
||||||
|
+ path_to_file_uri,
|
||||||
|
+ safe_download_url,
|
||||||
|
+ safe_url_string,
|
||||||
|
+ url_query_parameter,
|
||||||
|
+ url_query_cleaner,
|
||||||
|
+ urljoin_rfc,
|
||||||
|
+)
|
||||||
|
|
||||||
|
|
||||||
|
class UrlTests(unittest.TestCase):
|
||||||
|
Index: w3lib-1.22.0/w3lib/form.py
|
||||||
|
===================================================================
|
||||||
|
--- w3lib-1.22.0.orig/w3lib/form.py
|
||||||
|
+++ w3lib-1.22.0/w3lib/form.py
|
||||||
|
@@ -1,9 +1,6 @@
|
||||||
|
import warnings
|
||||||
|
-import six
|
||||||
|
-if six.PY2:
|
||||||
|
- from cStringIO import StringIO as BytesIO
|
||||||
|
-else:
|
||||||
|
- from io import BytesIO
|
||||||
|
+from io import BytesIO
|
||||||
|
+
|
||||||
|
from w3lib.util import unicode_to_str
|
||||||
|
|
||||||
|
|
||||||
|
Index: w3lib-1.22.0/w3lib/html.py
|
||||||
|
===================================================================
|
||||||
|
--- w3lib-1.22.0.orig/w3lib/html.py
|
||||||
|
+++ w3lib-1.22.0/w3lib/html.py
|
||||||
|
@@ -5,16 +5,16 @@ Functions for dealing with markup text
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
import re
|
||||||
|
-import six
|
||||||
|
-from six import moves
|
||||||
|
+from html.entities import name2codepoint
|
||||||
|
+from urllib.parse import urljoin
|
||||||
|
|
||||||
|
from w3lib.util import to_bytes, to_unicode
|
||||||
|
from w3lib.url import safe_url_string
|
||||||
|
|
||||||
|
_ent_re = re.compile(r'&((?P<named>[a-z\d]+)|#(?P<dec>\d+)|#x(?P<hex>[a-f\d]+))(?P<semicolon>;?)', re.IGNORECASE)
|
||||||
|
_tag_re = re.compile(r'<[a-zA-Z\/!].*?>', re.DOTALL)
|
||||||
|
-_baseurl_re = re.compile(six.u(r'<base\s[^>]*href\s*=\s*[\"\']\s*([^\"\'\s]+)\s*[\"\']'), re.I)
|
||||||
|
-_meta_refresh_re = re.compile(six.u(r'<meta\s[^>]*http-equiv[^>]*refresh[^>]*content\s*=\s*(?P<quote>["\'])(?P<int>(\d*\.)?\d+)\s*;\s*url=\s*(?P<url>.*?)(?P=quote)'), re.DOTALL | re.IGNORECASE)
|
||||||
|
+_baseurl_re = re.compile(r'<base\s[^>]*href\s*=\s*[\"\']\s*([^\"\'\s]+)\s*[\"\']', re.I)
|
||||||
|
+_meta_refresh_re = re.compile(r'<meta\s[^>]*http-equiv[^>]*refresh[^>]*content\s*=\s*(?P<quote>["\'])(?P<int>(\d*\.)?\d+)\s*;\s*url=\s*(?P<url>.*?)(?P=quote)', re.DOTALL | re.IGNORECASE)
|
||||||
|
_cdata_re = re.compile(r'((?P<cdata_s><!\[CDATA\[)(?P<cdata_d>.*?)(?P<cdata_e>\]\]>))', re.DOTALL)
|
||||||
|
|
||||||
|
HTML5_WHITESPACE = ' \t\n\r\x0c'
|
||||||
|
@@ -77,8 +77,10 @@ def replace_entities(text, keep=(), remo
|
||||||
|
if entity_name.lower() in keep:
|
||||||
|
return m.group(0)
|
||||||
|
else:
|
||||||
|
- number = (moves.html_entities.name2codepoint.get(entity_name) or
|
||||||
|
- moves.html_entities.name2codepoint.get(entity_name.lower()))
|
||||||
|
+ number = (
|
||||||
|
+ name2codepoint.get(entity_name)
|
||||||
|
+ or name2codepoint.get(entity_name.lower())
|
||||||
|
+ )
|
||||||
|
if number is not None:
|
||||||
|
# Numeric character references in the 80-9F range are typically
|
||||||
|
# interpreted by browsers as representing the characters mapped
|
||||||
|
@@ -86,9 +88,9 @@ def replace_entities(text, keep=(), remo
|
||||||
|
# see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
|
||||||
|
try:
|
||||||
|
if 0x80 <= number <= 0x9f:
|
||||||
|
- return six.int2byte(number).decode('cp1252')
|
||||||
|
+ return bytes((number,)).decode('cp1252')
|
||||||
|
else:
|
||||||
|
- return six.unichr(number)
|
||||||
|
+ return chr(number)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@@ -265,7 +267,7 @@ def unquote_markup(text, keep=(), remove
|
||||||
|
text = to_unicode(text, encoding)
|
||||||
|
ret_text = u''
|
||||||
|
for fragment in _get_fragments(text, _cdata_re):
|
||||||
|
- if isinstance(fragment, six.string_types):
|
||||||
|
+ if isinstance(fragment, str):
|
||||||
|
# it's not a CDATA (so we try to remove its entities)
|
||||||
|
ret_text += replace_entities(fragment, keep=keep, remove_illegal=remove_illegal)
|
||||||
|
else:
|
||||||
|
@@ -284,7 +286,7 @@ def get_base_url(text, baseurl='', encod
|
||||||
|
text = to_unicode(text, encoding)
|
||||||
|
m = _baseurl_re.search(text)
|
||||||
|
if m:
|
||||||
|
- return moves.urllib.parse.urljoin(
|
||||||
|
+ return urljoin(
|
||||||
|
safe_url_string(baseurl),
|
||||||
|
safe_url_string(m.group(1), encoding=encoding)
|
||||||
|
)
|
||||||
|
@@ -301,8 +303,6 @@ def get_meta_refresh(text, baseurl='', e
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
- if six.PY2:
|
||||||
|
- baseurl = to_bytes(baseurl, encoding)
|
||||||
|
try:
|
||||||
|
text = to_unicode(text, encoding)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
@@ -314,7 +314,7 @@ def get_meta_refresh(text, baseurl='', e
|
||||||
|
if m:
|
||||||
|
interval = float(m.group('int'))
|
||||||
|
url = safe_url_string(m.group('url').strip(' "\''), encoding)
|
||||||
|
- url = moves.urllib.parse.urljoin(baseurl, url)
|
||||||
|
+ url = urljoin(baseurl, url)
|
||||||
|
return interval, url
|
||||||
|
else:
|
||||||
|
return None, None
|
||||||
|
Index: w3lib-1.22.0/w3lib/url.py
|
||||||
|
===================================================================
|
||||||
|
--- w3lib-1.22.0.orig/w3lib/url.py
|
||||||
|
+++ w3lib-1.22.0/w3lib/url.py
|
||||||
|
@@ -5,17 +5,28 @@ library.
|
||||||
|
import base64
|
||||||
|
import codecs
|
||||||
|
import os
|
||||||
|
-import re
|
||||||
|
import posixpath
|
||||||
|
-import warnings
|
||||||
|
+import re
|
||||||
|
import string
|
||||||
|
+import warnings
|
||||||
|
from collections import namedtuple
|
||||||
|
-import six
|
||||||
|
-from six.moves.urllib.parse import (urljoin, urlsplit, urlunsplit,
|
||||||
|
- urldefrag, urlencode, urlparse,
|
||||||
|
- quote, parse_qs, parse_qsl,
|
||||||
|
- ParseResult, unquote, urlunparse)
|
||||||
|
-from six.moves.urllib.request import pathname2url, url2pathname
|
||||||
|
+from urllib.parse import (
|
||||||
|
+ _coerce_args,
|
||||||
|
+ parse_qs,
|
||||||
|
+ parse_qsl,
|
||||||
|
+ ParseResult,
|
||||||
|
+ quote,
|
||||||
|
+ unquote,
|
||||||
|
+ unquote_to_bytes,
|
||||||
|
+ urldefrag,
|
||||||
|
+ urlencode,
|
||||||
|
+ urljoin,
|
||||||
|
+ urlparse,
|
||||||
|
+ urlsplit,
|
||||||
|
+ urlunparse,
|
||||||
|
+ urlunsplit,
|
||||||
|
+)
|
||||||
|
+from urllib.request import pathname2url, url2pathname
|
||||||
|
from w3lib.util import to_bytes, to_native_str, to_unicode
|
||||||
|
|
||||||
|
|
||||||
|
@@ -184,7 +195,7 @@ def url_query_cleaner(url, parameterlist
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
- if isinstance(parameterlist, (six.text_type, bytes)):
|
||||||
|
+ if isinstance(parameterlist, (str, bytes)):
|
||||||
|
parameterlist = [parameterlist]
|
||||||
|
url, fragment = urldefrag(url)
|
||||||
|
base, _, query = url.partition('?')
|
||||||
|
@@ -346,10 +357,7 @@ def parse_data_uri(uri):
|
||||||
|
# delimiters, but it makes parsing easier and should not affect
|
||||||
|
# well-formed URIs, as the delimiters used in this URI scheme are not
|
||||||
|
# allowed, percent-encoded or not, in tokens.
|
||||||
|
- if six.PY2:
|
||||||
|
- uri = unquote(uri)
|
||||||
|
- else:
|
||||||
|
- uri = unquote_to_bytes(uri)
|
||||||
|
+ uri = unquote_to_bytes(uri)
|
||||||
|
|
||||||
|
media_type = "text/plain"
|
||||||
|
media_type_params = {}
|
||||||
|
@@ -469,33 +477,32 @@ def canonicalize_url(url, keep_blank_val
|
||||||
|
# 1. decode query-string as UTF-8 (or keep raw bytes),
|
||||||
|
# sort values,
|
||||||
|
# and percent-encode them back
|
||||||
|
- if six.PY2:
|
||||||
|
- keyvals = parse_qsl(query, keep_blank_values)
|
||||||
|
- else:
|
||||||
|
- # Python3's urllib.parse.parse_qsl does not work as wanted
|
||||||
|
- # for percent-encoded characters that do not match passed encoding,
|
||||||
|
- # they get lost.
|
||||||
|
- #
|
||||||
|
- # e.g., 'q=b%a3' becomes [('q', 'b\ufffd')]
|
||||||
|
- # (ie. with 'REPLACEMENT CHARACTER' (U+FFFD),
|
||||||
|
- # instead of \xa3 that you get with Python2's parse_qsl)
|
||||||
|
- #
|
||||||
|
- # what we want here is to keep raw bytes, and percent encode them
|
||||||
|
- # so as to preserve whatever encoding what originally used.
|
||||||
|
- #
|
||||||
|
- # See https://tools.ietf.org/html/rfc3987#section-6.4:
|
||||||
|
- #
|
||||||
|
- # For example, it is possible to have a URI reference of
|
||||||
|
- # "http://www.example.org/r%E9sum%E9.xml#r%C3%A9sum%C3%A9", where the
|
||||||
|
- # document name is encoded in iso-8859-1 based on server settings, but
|
||||||
|
- # where the fragment identifier is encoded in UTF-8 according to
|
||||||
|
- # [XPointer]. The IRI corresponding to the above URI would be (in XML
|
||||||
|
- # notation)
|
||||||
|
- # "http://www.example.org/r%E9sum%E9.xml#résumé".
|
||||||
|
- # Similar considerations apply to query parts. The functionality of
|
||||||
|
- # IRIs (namely, to be able to include non-ASCII characters) can only be
|
||||||
|
- # used if the query part is encoded in UTF-8.
|
||||||
|
- keyvals = parse_qsl_to_bytes(query, keep_blank_values)
|
||||||
|
+
|
||||||
|
+ # Python's urllib.parse.parse_qsl does not work as wanted
|
||||||
|
+ # for percent-encoded characters that do not match passed encoding,
|
||||||
|
+ # they get lost.
|
||||||
|
+ #
|
||||||
|
+ # e.g., 'q=b%a3' becomes [('q', 'b\ufffd')]
|
||||||
|
+ # (ie. with 'REPLACEMENT CHARACTER' (U+FFFD),
|
||||||
|
+ # instead of \xa3 that you get with Python2's parse_qsl)
|
||||||
|
+ #
|
||||||
|
+ # what we want here is to keep raw bytes, and percent encode them
|
||||||
|
+ # so as to preserve whatever encoding what originally used.
|
||||||
|
+ #
|
||||||
|
+ # See https://tools.ietf.org/html/rfc3987#section-6.4:
|
||||||
|
+ #
|
||||||
|
+ # For example, it is possible to have a URI reference of
|
||||||
|
+ # "http://www.example.org/r%E9sum%E9.xml#r%C3%A9sum%C3%A9", where the
|
||||||
|
+ # document name is encoded in iso-8859-1 based on server settings, but
|
||||||
|
+ # where the fragment identifier is encoded in UTF-8 according to
|
||||||
|
+ # [XPointer]. The IRI corresponding to the above URI would be (in XML
|
||||||
|
+ # notation)
|
||||||
|
+ # "http://www.example.org/r%E9sum%E9.xml#résumé".
|
||||||
|
+ # Similar considerations apply to query parts. The functionality of
|
||||||
|
+ # IRIs (namely, to be able to include non-ASCII characters) can only be
|
||||||
|
+ # used if the query part is encoded in UTF-8.
|
||||||
|
+ keyvals = parse_qsl_to_bytes(query, keep_blank_values)
|
||||||
|
+
|
||||||
|
keyvals.sort()
|
||||||
|
query = urlencode(keyvals)
|
||||||
|
|
||||||
|
@@ -519,17 +526,12 @@ def _unquotepath(path):
|
||||||
|
for reserved in ('2f', '2F', '3f', '3F'):
|
||||||
|
path = path.replace('%' + reserved, '%25' + reserved.upper())
|
||||||
|
|
||||||
|
- if six.PY2:
|
||||||
|
- # in Python 2, '%a3' becomes '\xa3', which is what we want
|
||||||
|
- return unquote(path)
|
||||||
|
- else:
|
||||||
|
- # in Python 3,
|
||||||
|
- # standard lib's unquote() does not work for non-UTF-8
|
||||||
|
- # percent-escaped characters, they get lost.
|
||||||
|
- # e.g., '%a3' becomes 'REPLACEMENT CHARACTER' (U+FFFD)
|
||||||
|
- #
|
||||||
|
- # unquote_to_bytes() returns raw bytes instead
|
||||||
|
- return unquote_to_bytes(path)
|
||||||
|
+ # standard lib's unquote() does not work for non-UTF-8
|
||||||
|
+ # percent-escaped characters, they get lost.
|
||||||
|
+ # e.g., '%a3' becomes 'REPLACEMENT CHARACTER' (U+FFFD)
|
||||||
|
+ #
|
||||||
|
+ # unquote_to_bytes() returns raw bytes instead
|
||||||
|
+ return unquote_to_bytes(path)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_url(url, encoding=None):
|
||||||
|
@@ -541,51 +543,48 @@ def parse_url(url, encoding=None):
|
||||||
|
return urlparse(to_unicode(url, encoding))
|
||||||
|
|
||||||
|
|
||||||
|
-if not six.PY2:
|
||||||
|
- from urllib.parse import _coerce_args, unquote_to_bytes
|
||||||
|
+def parse_qsl_to_bytes(qs, keep_blank_values=False):
|
||||||
|
+ """Parse a query given as a string argument.
|
||||||
|
+
|
||||||
|
+ Data are returned as a list of name, value pairs as bytes.
|
||||||
|
|
||||||
|
- def parse_qsl_to_bytes(qs, keep_blank_values=False):
|
||||||
|
- """Parse a query given as a string argument.
|
||||||
|
+ Arguments:
|
||||||
|
|
||||||
|
- Data are returned as a list of name, value pairs as bytes.
|
||||||
|
+ qs: percent-encoded query string to be parsed
|
||||||
|
|
||||||
|
- Arguments:
|
||||||
|
-
|
||||||
|
- qs: percent-encoded query string to be parsed
|
||||||
|
-
|
||||||
|
- keep_blank_values: flag indicating whether blank values in
|
||||||
|
- percent-encoded queries should be treated as blank strings. A
|
||||||
|
- true value indicates that blanks should be retained as blank
|
||||||
|
- strings. The default false value indicates that blank values
|
||||||
|
- are to be ignored and treated as if they were not included.
|
||||||
|
-
|
||||||
|
- """
|
||||||
|
- # This code is the same as Python3's parse_qsl()
|
||||||
|
- # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a)
|
||||||
|
- # except for the unquote(s, encoding, errors) calls replaced
|
||||||
|
- # with unquote_to_bytes(s)
|
||||||
|
- qs, _coerce_result = _coerce_args(qs)
|
||||||
|
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||||
|
- r = []
|
||||||
|
- for name_value in pairs:
|
||||||
|
- if not name_value:
|
||||||
|
+ keep_blank_values: flag indicating whether blank values in
|
||||||
|
+ percent-encoded queries should be treated as blank strings. A
|
||||||
|
+ true value indicates that blanks should be retained as blank
|
||||||
|
+ strings. The default false value indicates that blank values
|
||||||
|
+ are to be ignored and treated as if they were not included.
|
||||||
|
+
|
||||||
|
+ """
|
||||||
|
+ # This code is the same as Python3's parse_qsl()
|
||||||
|
+ # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a)
|
||||||
|
+ # except for the unquote(s, encoding, errors) calls replaced
|
||||||
|
+ # with unquote_to_bytes(s)
|
||||||
|
+ qs, _coerce_result = _coerce_args(qs)
|
||||||
|
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||||
|
+ r = []
|
||||||
|
+ for name_value in pairs:
|
||||||
|
+ if not name_value:
|
||||||
|
+ continue
|
||||||
|
+ nv = name_value.split('=', 1)
|
||||||
|
+ if len(nv) != 2:
|
||||||
|
+ # Handle case of a control-name with no equal sign
|
||||||
|
+ if keep_blank_values:
|
||||||
|
+ nv.append('')
|
||||||
|
+ else:
|
||||||
|
continue
|
||||||
|
- nv = name_value.split('=', 1)
|
||||||
|
- if len(nv) != 2:
|
||||||
|
- # Handle case of a control-name with no equal sign
|
||||||
|
- if keep_blank_values:
|
||||||
|
- nv.append('')
|
||||||
|
- else:
|
||||||
|
- continue
|
||||||
|
- if len(nv[1]) or keep_blank_values:
|
||||||
|
- name = nv[0].replace('+', ' ')
|
||||||
|
- name = unquote_to_bytes(name)
|
||||||
|
- name = _coerce_result(name)
|
||||||
|
- value = nv[1].replace('+', ' ')
|
||||||
|
- value = unquote_to_bytes(value)
|
||||||
|
- value = _coerce_result(value)
|
||||||
|
- r.append((name, value))
|
||||||
|
- return r
|
||||||
|
+ if len(nv[1]) or keep_blank_values:
|
||||||
|
+ name = nv[0].replace('+', ' ')
|
||||||
|
+ name = unquote_to_bytes(name)
|
||||||
|
+ name = _coerce_result(name)
|
||||||
|
+ value = nv[1].replace('+', ' ')
|
||||||
|
+ value = unquote_to_bytes(value)
|
||||||
|
+ value = _coerce_result(value)
|
||||||
|
+ r.append((name, value))
|
||||||
|
+ return r
|
||||||
|
|
||||||
|
|
||||||
|
def urljoin_rfc(base, ref, encoding='utf-8'):
|
||||||
|
Index: w3lib-1.22.0/w3lib/util.py
|
||||||
|
===================================================================
|
||||||
|
--- w3lib-1.22.0.orig/w3lib/util.py
|
||||||
|
+++ w3lib-1.22.0/w3lib/util.py
|
||||||
|
@@ -1,5 +1,3 @@
|
||||||
|
-import six
|
||||||
|
-
|
||||||
|
def str_to_unicode(text, encoding=None, errors='strict'):
|
||||||
|
if encoding is None:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
@@ -10,16 +8,16 @@ def str_to_unicode(text, encoding=None,
|
||||||
|
def unicode_to_str(text, encoding=None, errors='strict'):
|
||||||
|
if encoding is None:
|
||||||
|
encoding = 'utf-8'
|
||||||
|
- if isinstance(text, six.text_type):
|
||||||
|
+ if isinstance(text, str):
|
||||||
|
return text.encode(encoding, errors)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def to_unicode(text, encoding=None, errors='strict'):
|
||||||
|
"""Return the unicode representation of a bytes object `text`. If `text`
|
||||||
|
is already an unicode object, return it as-is."""
|
||||||
|
- if isinstance(text, six.text_type):
|
||||||
|
+ if isinstance(text, str):
|
||||||
|
return text
|
||||||
|
- if not isinstance(text, (bytes, six.text_type)):
|
||||||
|
+ if not isinstance(text, (bytes, str)):
|
||||||
|
raise TypeError('to_unicode must receive a bytes, str or unicode '
|
||||||
|
'object, got %s' % type(text).__name__)
|
||||||
|
if encoding is None:
|
||||||
|
@@ -31,7 +29,7 @@ def to_bytes(text, encoding=None, errors
|
||||||
|
is already a bytes object, return it as-is."""
|
||||||
|
if isinstance(text, bytes):
|
||||||
|
return text
|
||||||
|
- if not isinstance(text, six.string_types):
|
||||||
|
+ if not isinstance(text, str):
|
||||||
|
raise TypeError('to_bytes must receive a unicode, str or bytes '
|
||||||
|
'object, got %s' % type(text).__name__)
|
||||||
|
if encoding is None:
|
||||||
|
@@ -39,9 +37,5 @@ def to_bytes(text, encoding=None, errors
|
||||||
|
return text.encode(encoding, errors)
|
||||||
|
|
||||||
|
def to_native_str(text, encoding=None, errors='strict'):
|
||||||
|
- """ Return str representation of `text`
|
||||||
|
- (bytes in Python 2.x and unicode in Python 3.x). """
|
||||||
|
- if six.PY2:
|
||||||
|
- return to_bytes(text, encoding, errors)
|
||||||
|
- else:
|
||||||
|
- return to_unicode(text, encoding, errors)
|
||||||
|
+ """ Return str representation of `text` """
|
||||||
|
+ return to_unicode(text, encoding, errors)
|
||||||
@@ -1,3 +1,11 @@
|
|||||||
|
-------------------------------------------------------------------
|
||||||
|
Fri Oct 14 19:10:33 UTC 2022 - pgajdos@suse.com
|
||||||
|
|
||||||
|
- do not require six
|
||||||
|
- added patches
|
||||||
|
https://github.com/scrapy/w3lib/commit/c16d7bac3af3148b7018c67ef7922a5da6b3e640
|
||||||
|
+ python-w3lib-no-six.patch
|
||||||
|
|
||||||
-------------------------------------------------------------------
|
-------------------------------------------------------------------
|
||||||
Fri Mar 19 14:28:28 UTC 2021 - Matej Cepl <mcepl@suse.com>
|
Fri Mar 19 14:28:28 UTC 2021 - Matej Cepl <mcepl@suse.com>
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#
|
#
|
||||||
# spec file for package python-w3lib
|
# spec file for package python-w3lib
|
||||||
#
|
#
|
||||||
# Copyright (c) 2021 SUSE LLC
|
# Copyright (c) 2022 SUSE LLC
|
||||||
#
|
#
|
||||||
# All modifications and additions to the file contributed by third parties
|
# All modifications and additions to the file contributed by third parties
|
||||||
# remain the property of their copyright owners, unless otherwise agreed
|
# remain the property of their copyright owners, unless otherwise agreed
|
||||||
@@ -28,9 +28,10 @@ Source: https://files.pythonhosted.org/packages/source/w/w3lib/w3lib-%{v
|
|||||||
# PATCH-FIX-UPSTREAM 166-add-xfail-test_add_or_replace_parameter_fail.patch mcepl@suse.com
|
# PATCH-FIX-UPSTREAM 166-add-xfail-test_add_or_replace_parameter_fail.patch mcepl@suse.com
|
||||||
# Allow working with Python fixed CVE-2021-23336
|
# Allow working with Python fixed CVE-2021-23336
|
||||||
Patch0: 166-add-xfail-test_add_or_replace_parameter_fail.patch
|
Patch0: 166-add-xfail-test_add_or_replace_parameter_fail.patch
|
||||||
|
# https://github.com/scrapy/w3lib/commit/c16d7bac3af3148b7018c67ef7922a5da6b3e640
|
||||||
|
Patch1: python-w3lib-no-six.patch
|
||||||
BuildRequires: %{python_module pytest}
|
BuildRequires: %{python_module pytest}
|
||||||
BuildRequires: %{python_module setuptools}
|
BuildRequires: %{python_module setuptools}
|
||||||
BuildRequires: %{python_module six >= 1.4.1}
|
|
||||||
BuildRequires: fdupes
|
BuildRequires: fdupes
|
||||||
BuildRequires: python-rpm-macros
|
BuildRequires: python-rpm-macros
|
||||||
BuildArch: noarch
|
BuildArch: noarch
|
||||||
|
|||||||
Reference in New Issue
Block a user