From 7702237bf23f2ceab90a0ca92bb39ecf114613e856effd97e997b8197ad478af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mark=C3=A9ta=20Machov=C3=A1?= Date: Mon, 17 Oct 2022 13:09:22 +0000 Subject: [PATCH] Accepting request 1011020 from home:pgajdos:python - do not require six - added patches https://github.com/scrapy/w3lib/commit/c16d7bac3af3148b7018c67ef7922a5da6b3e640 + python-w3lib-no-six.patch OBS-URL: https://build.opensuse.org/request/show/1011020 OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-w3lib?expand=0&rev=20 --- python-w3lib-no-six.patch | 641 ++++++++++++++++++++++++++++++++++++++ python-w3lib.changes | 8 + python-w3lib.spec | 5 +- 3 files changed, 652 insertions(+), 2 deletions(-) create mode 100644 python-w3lib-no-six.patch diff --git a/python-w3lib-no-six.patch b/python-w3lib-no-six.patch new file mode 100644 index 0000000..1cf0b59 --- /dev/null +++ b/python-w3lib-no-six.patch @@ -0,0 +1,641 @@ +Index: w3lib-1.22.0/setup.py +=================================================================== +--- w3lib-1.22.0.orig/setup.py ++++ w3lib-1.22.0/setup.py +@@ -29,5 +29,4 @@ setup( + 'Programming Language :: Python :: Implementation :: PyPy', + 'Topic :: Internet :: WWW/HTTP', + ], +- install_requires=['six >= 1.4.1'], + ) +Index: w3lib-1.22.0/tests/test_encoding.py +=================================================================== +--- w3lib-1.22.0.orig/tests/test_encoding.py ++++ w3lib-1.22.0/tests/test_encoding.py +@@ -1,7 +1,14 @@ +-import unittest, codecs +-import six +-from w3lib.encoding import (html_body_declared_encoding, read_bom, to_unicode, +- http_content_type_encoding, resolve_encoding, html_to_unicode) ++import codecs ++import unittest ++ ++from w3lib.encoding import ( ++ html_body_declared_encoding, ++ http_content_type_encoding, ++ html_to_unicode, ++ read_bom, ++ resolve_encoding, ++ to_unicode, ++) + + class RequestEncodingTests(unittest.TestCase): + utf8_fragments = [ +@@ -107,18 +114,18 @@ class HtmlConversionTests(unittest.TestC + original_string = unicode_string.encode('cp1251') + encoding, body_unicode = html_to_unicode(ct('cp1251'), original_string) + # check body_as_unicode +- self.assertTrue(isinstance(body_unicode, six.text_type)) ++ self.assertTrue(isinstance(body_unicode, str)) + self.assertEqual(body_unicode, unicode_string) + + def _assert_encoding(self, content_type, body, expected_encoding, + expected_unicode): +- assert not isinstance(body, six.text_type) ++ assert not isinstance(body, str) + encoding, body_unicode = html_to_unicode(ct(content_type), body) +- self.assertTrue(isinstance(body_unicode, six.text_type)) ++ self.assertTrue(isinstance(body_unicode, str)) + self.assertEqual(norm_encoding(encoding), + norm_encoding(expected_encoding)) + +- if isinstance(expected_unicode, six.string_types): ++ if isinstance(expected_unicode, str): + self.assertEqual(body_unicode, expected_unicode) + else: + self.assertTrue( +@@ -177,9 +184,9 @@ class HtmlConversionTests(unittest.TestC + + def _assert_encoding_detected(self, content_type, expected_encoding, body, + **kwargs): +- assert not isinstance(body, six.text_type) ++ assert not isinstance(body, str) + encoding, body_unicode = html_to_unicode(ct(content_type), body, **kwargs) +- self.assertTrue(isinstance(body_unicode, six.text_type)) ++ self.assertTrue(isinstance(body_unicode, str)) + self.assertEqual(norm_encoding(encoding), norm_encoding(expected_encoding)) + + def test_BOM(self): +Index: w3lib-1.22.0/tests/test_html.py +=================================================================== +--- w3lib-1.22.0.orig/tests/test_html.py ++++ w3lib-1.22.0/tests/test_html.py +@@ -1,18 +1,25 @@ +-# -*- coding: utf-8 -*- + import unittest +-import six +-from w3lib.html import (replace_entities, replace_tags, remove_comments, +- remove_tags_with_content, replace_escape_chars, remove_tags, unquote_markup, +- get_base_url, get_meta_refresh) ++ ++from w3lib.html import ( ++ get_base_url, ++ get_meta_refresh, ++ remove_comments, ++ remove_tags, ++ remove_tags_with_content, ++ replace_entities, ++ replace_escape_chars, ++ replace_tags, ++ unquote_markup, ++) + + + class RemoveEntitiesTest(unittest.TestCase): + def test_returns_unicode(self): + # make sure it always return uncode +- assert isinstance(replace_entities(b'no entities'), six.text_type) +- assert isinstance(replace_entities(b'Price: £100!'), six.text_type) +- assert isinstance(replace_entities(u'no entities'), six.text_type) +- assert isinstance(replace_entities(u'Price: £100!'), six.text_type) ++ assert isinstance(replace_entities(b'no entities'), str) ++ assert isinstance(replace_entities(b'Price: £100!'), str) ++ assert isinstance(replace_entities(u'no entities'), str) ++ assert isinstance(replace_entities(u'Price: £100!'), str) + + def test_regular(self): + # regular conversions +@@ -71,8 +78,8 @@ class RemoveEntitiesTest(unittest.TestCa + class ReplaceTagsTest(unittest.TestCase): + def test_returns_unicode(self): + # make sure it always return uncode +- assert isinstance(replace_tags(b'no entities'), six.text_type) +- assert isinstance(replace_tags('no entities'), six.text_type) ++ assert isinstance(replace_tags(b'no entities'), str) ++ assert isinstance(replace_tags('no entities'), str) + + def test_replace_tags(self): + self.assertEqual(replace_tags(u'This text contains some tag'), +@@ -88,10 +95,10 @@ class ReplaceTagsTest(unittest.TestCase) + class RemoveCommentsTest(unittest.TestCase): + def test_returns_unicode(self): + # make sure it always return unicode +- assert isinstance(remove_comments(b'without comments'), six.text_type) +- assert isinstance(remove_comments(b''), six.text_type) +- assert isinstance(remove_comments(u'without comments'), six.text_type) +- assert isinstance(remove_comments(u''), six.text_type) ++ assert isinstance(remove_comments(b'without comments'), str) ++ assert isinstance(remove_comments(b''), str) ++ assert isinstance(remove_comments(u'without comments'), str) ++ assert isinstance(remove_comments(u''), str) + + def test_no_comments(self): + # text without comments +@@ -112,16 +119,16 @@ class RemoveCommentsTest(unittest.TestCa + class RemoveTagsTest(unittest.TestCase): + def test_returns_unicode(self): + # make sure it always return unicode +- assert isinstance(remove_tags(b'no tags'), six.text_type) +- assert isinstance(remove_tags(b'no tags', which_ones=('p',)), six.text_type) +- assert isinstance(remove_tags(b'

one tag

'), six.text_type) +- assert isinstance(remove_tags(b'

one tag

', which_ones=('p')), six.text_type) +- assert isinstance(remove_tags(b'link', which_ones=('b',)), six.text_type) +- assert isinstance(remove_tags(u'no tags'), six.text_type) +- assert isinstance(remove_tags(u'no tags', which_ones=('p',)), six.text_type) +- assert isinstance(remove_tags(u'

one tag

'), six.text_type) +- assert isinstance(remove_tags(u'

one tag

', which_ones=('p')), six.text_type) +- assert isinstance(remove_tags(u'link', which_ones=('b',)), six.text_type) ++ assert isinstance(remove_tags(b'no tags'), str) ++ assert isinstance(remove_tags(b'no tags', which_ones=('p',)), str) ++ assert isinstance(remove_tags(b'

one tag

'), str) ++ assert isinstance(remove_tags(b'

one tag

', which_ones=('p')), str) ++ assert isinstance(remove_tags(b'link', which_ones=('b',)), str) ++ assert isinstance(remove_tags(u'no tags'), str) ++ assert isinstance(remove_tags(u'no tags', which_ones=('p',)), str) ++ assert isinstance(remove_tags(u'

one tag

'), str) ++ assert isinstance(remove_tags(u'

one tag

', which_ones=('p')), str) ++ assert isinstance(remove_tags(u'link', which_ones=('b',)), str) + + def test_remove_tags_without_tags(self): + # text without tags +@@ -160,14 +167,14 @@ class RemoveTagsTest(unittest.TestCase): + class RemoveTagsWithContentTest(unittest.TestCase): + def test_returns_unicode(self): + # make sure it always return unicode +- assert isinstance(remove_tags_with_content(b'no tags'), six.text_type) +- assert isinstance(remove_tags_with_content(b'no tags', which_ones=('p',)), six.text_type) +- assert isinstance(remove_tags_with_content(b'

one tag

', which_ones=('p',)), six.text_type) +- assert isinstance(remove_tags_with_content(b'link', which_ones=('b',)), six.text_type) +- assert isinstance(remove_tags_with_content(u'no tags'), six.text_type) +- assert isinstance(remove_tags_with_content(u'no tags', which_ones=('p',)), six.text_type) +- assert isinstance(remove_tags_with_content(u'

one tag

', which_ones=('p',)), six.text_type) +- assert isinstance(remove_tags_with_content(u'link', which_ones=('b',)), six.text_type) ++ assert isinstance(remove_tags_with_content(b'no tags'), str) ++ assert isinstance(remove_tags_with_content(b'no tags', which_ones=('p',)), str) ++ assert isinstance(remove_tags_with_content(b'

one tag

', which_ones=('p',)), str) ++ assert isinstance(remove_tags_with_content(b'link', which_ones=('b',)), str) ++ assert isinstance(remove_tags_with_content(u'no tags'), str) ++ assert isinstance(remove_tags_with_content(u'no tags', which_ones=('p',)), str) ++ assert isinstance(remove_tags_with_content(u'

one tag

', which_ones=('p',)), str) ++ assert isinstance(remove_tags_with_content(u'link', which_ones=('b',)), str) + + def test_without_tags(self): + # text without tags +@@ -194,13 +201,13 @@ class RemoveTagsWithContentTest(unittest + class ReplaceEscapeCharsTest(unittest.TestCase): + def test_returns_unicode(self): + # make sure it always return unicode +- assert isinstance(replace_escape_chars(b'no ec'), six.text_type) +- assert isinstance(replace_escape_chars(b'no ec', replace_by='str'), six.text_type) +- assert isinstance(replace_escape_chars(b'no ec', replace_by=u'str'), six.text_type) +- assert isinstance(replace_escape_chars(b'no ec', which_ones=('\n', '\t',)), six.text_type) +- assert isinstance(replace_escape_chars(u'no ec'), six.text_type) +- assert isinstance(replace_escape_chars(u'no ec', replace_by=u'str'), six.text_type) +- assert isinstance(replace_escape_chars(u'no ec', which_ones=('\n', '\t',)), six.text_type) ++ assert isinstance(replace_escape_chars(b'no ec'), str) ++ assert isinstance(replace_escape_chars(b'no ec', replace_by='str'), str) ++ assert isinstance(replace_escape_chars(b'no ec', replace_by=u'str'), str) ++ assert isinstance(replace_escape_chars(b'no ec', which_ones=('\n', '\t',)), str) ++ assert isinstance(replace_escape_chars(u'no ec'), str) ++ assert isinstance(replace_escape_chars(u'no ec', replace_by=u'str'), str) ++ assert isinstance(replace_escape_chars(u'no ec', which_ones=('\n', '\t',)), str) + + def test_without_escape_chars(self): + # text without escape chars +@@ -226,8 +233,8 @@ class UnquoteMarkupTest(unittest.TestCas + + def test_returns_unicode(self): + # make sure it always return unicode +- assert isinstance(unquote_markup(self.sample_txt1.encode('latin-1')), six.text_type) +- assert isinstance(unquote_markup(self.sample_txt2), six.text_type) ++ assert isinstance(unquote_markup(self.sample_txt1.encode('latin-1')), str) ++ assert isinstance(unquote_markup(self.sample_txt2), str) + + def test_unquote_markup(self): + self.assertEqual(unquote_markup(self.sample_txt1), u"""hi, this is sample text with entities: & \xa9 +Index: w3lib-1.22.0/tests/test_url.py +=================================================================== +--- w3lib-1.22.0.orig/tests/test_url.py ++++ w3lib-1.22.0/tests/test_url.py +@@ -1,15 +1,25 @@ +-# -*- coding: utf-8 -*- +-from __future__ import absolute_import + import os + import unittest ++from urllib.parse import urlparse + + import pytest +-from six.moves.urllib.parse import urlparse + +-from w3lib.url import (is_url, safe_url_string, safe_download_url, +- url_query_parameter, add_or_replace_parameter, url_query_cleaner, +- file_uri_to_path, parse_data_uri, path_to_file_uri, any_to_uri, +- urljoin_rfc, canonicalize_url, parse_url, add_or_replace_parameters) ++from w3lib.url import ( ++ add_or_replace_parameter, ++ add_or_replace_parameters, ++ any_to_uri, ++ canonicalize_url, ++ file_uri_to_path, ++ is_url, ++ parse_data_uri, ++ parse_url, ++ path_to_file_uri, ++ safe_download_url, ++ safe_url_string, ++ url_query_parameter, ++ url_query_cleaner, ++ urljoin_rfc, ++) + + + class UrlTests(unittest.TestCase): +Index: w3lib-1.22.0/w3lib/form.py +=================================================================== +--- w3lib-1.22.0.orig/w3lib/form.py ++++ w3lib-1.22.0/w3lib/form.py +@@ -1,9 +1,6 @@ + import warnings +-import six +-if six.PY2: +- from cStringIO import StringIO as BytesIO +-else: +- from io import BytesIO ++from io import BytesIO ++ + from w3lib.util import unicode_to_str + + +Index: w3lib-1.22.0/w3lib/html.py +=================================================================== +--- w3lib-1.22.0.orig/w3lib/html.py ++++ w3lib-1.22.0/w3lib/html.py +@@ -5,16 +5,16 @@ Functions for dealing with markup text + + import warnings + import re +-import six +-from six import moves ++from html.entities import name2codepoint ++from urllib.parse import urljoin + + from w3lib.util import to_bytes, to_unicode + from w3lib.url import safe_url_string + + _ent_re = re.compile(r'&((?P[a-z\d]+)|#(?P\d+)|#x(?P[a-f\d]+))(?P;?)', re.IGNORECASE) + _tag_re = re.compile(r'<[a-zA-Z\/!].*?>', re.DOTALL) +-_baseurl_re = re.compile(six.u(r']*href\s*=\s*[\"\']\s*([^\"\'\s]+)\s*[\"\']'), re.I) +-_meta_refresh_re = re.compile(six.u(r']*http-equiv[^>]*refresh[^>]*content\s*=\s*(?P["\'])(?P(\d*\.)?\d+)\s*;\s*url=\s*(?P.*?)(?P=quote)'), re.DOTALL | re.IGNORECASE) ++_baseurl_re = re.compile(r']*href\s*=\s*[\"\']\s*([^\"\'\s]+)\s*[\"\']', re.I) ++_meta_refresh_re = re.compile(r']*http-equiv[^>]*refresh[^>]*content\s*=\s*(?P["\'])(?P(\d*\.)?\d+)\s*;\s*url=\s*(?P.*?)(?P=quote)', re.DOTALL | re.IGNORECASE) + _cdata_re = re.compile(r'((?P.*?)(?P\]\]>))', re.DOTALL) + + HTML5_WHITESPACE = ' \t\n\r\x0c' +@@ -77,8 +77,10 @@ def replace_entities(text, keep=(), remo + if entity_name.lower() in keep: + return m.group(0) + else: +- number = (moves.html_entities.name2codepoint.get(entity_name) or +- moves.html_entities.name2codepoint.get(entity_name.lower())) ++ number = ( ++ name2codepoint.get(entity_name) ++ or name2codepoint.get(entity_name.lower()) ++ ) + if number is not None: + # Numeric character references in the 80-9F range are typically + # interpreted by browsers as representing the characters mapped +@@ -86,9 +88,9 @@ def replace_entities(text, keep=(), remo + # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML + try: + if 0x80 <= number <= 0x9f: +- return six.int2byte(number).decode('cp1252') ++ return bytes((number,)).decode('cp1252') + else: +- return six.unichr(number) ++ return chr(number) + except ValueError: + pass + +@@ -265,7 +267,7 @@ def unquote_markup(text, keep=(), remove + text = to_unicode(text, encoding) + ret_text = u'' + for fragment in _get_fragments(text, _cdata_re): +- if isinstance(fragment, six.string_types): ++ if isinstance(fragment, str): + # it's not a CDATA (so we try to remove its entities) + ret_text += replace_entities(fragment, keep=keep, remove_illegal=remove_illegal) + else: +@@ -284,7 +286,7 @@ def get_base_url(text, baseurl='', encod + text = to_unicode(text, encoding) + m = _baseurl_re.search(text) + if m: +- return moves.urllib.parse.urljoin( ++ return urljoin( + safe_url_string(baseurl), + safe_url_string(m.group(1), encoding=encoding) + ) +@@ -301,8 +303,6 @@ def get_meta_refresh(text, baseurl='', e + + """ + +- if six.PY2: +- baseurl = to_bytes(baseurl, encoding) + try: + text = to_unicode(text, encoding) + except UnicodeDecodeError: +@@ -314,7 +314,7 @@ def get_meta_refresh(text, baseurl='', e + if m: + interval = float(m.group('int')) + url = safe_url_string(m.group('url').strip(' "\''), encoding) +- url = moves.urllib.parse.urljoin(baseurl, url) ++ url = urljoin(baseurl, url) + return interval, url + else: + return None, None +Index: w3lib-1.22.0/w3lib/url.py +=================================================================== +--- w3lib-1.22.0.orig/w3lib/url.py ++++ w3lib-1.22.0/w3lib/url.py +@@ -5,17 +5,28 @@ library. + import base64 + import codecs + import os +-import re + import posixpath +-import warnings ++import re + import string ++import warnings + from collections import namedtuple +-import six +-from six.moves.urllib.parse import (urljoin, urlsplit, urlunsplit, +- urldefrag, urlencode, urlparse, +- quote, parse_qs, parse_qsl, +- ParseResult, unquote, urlunparse) +-from six.moves.urllib.request import pathname2url, url2pathname ++from urllib.parse import ( ++ _coerce_args, ++ parse_qs, ++ parse_qsl, ++ ParseResult, ++ quote, ++ unquote, ++ unquote_to_bytes, ++ urldefrag, ++ urlencode, ++ urljoin, ++ urlparse, ++ urlsplit, ++ urlunparse, ++ urlunsplit, ++) ++from urllib.request import pathname2url, url2pathname + from w3lib.util import to_bytes, to_native_str, to_unicode + + +@@ -184,7 +195,7 @@ def url_query_cleaner(url, parameterlist + + """ + +- if isinstance(parameterlist, (six.text_type, bytes)): ++ if isinstance(parameterlist, (str, bytes)): + parameterlist = [parameterlist] + url, fragment = urldefrag(url) + base, _, query = url.partition('?') +@@ -346,10 +357,7 @@ def parse_data_uri(uri): + # delimiters, but it makes parsing easier and should not affect + # well-formed URIs, as the delimiters used in this URI scheme are not + # allowed, percent-encoded or not, in tokens. +- if six.PY2: +- uri = unquote(uri) +- else: +- uri = unquote_to_bytes(uri) ++ uri = unquote_to_bytes(uri) + + media_type = "text/plain" + media_type_params = {} +@@ -469,33 +477,32 @@ def canonicalize_url(url, keep_blank_val + # 1. decode query-string as UTF-8 (or keep raw bytes), + # sort values, + # and percent-encode them back +- if six.PY2: +- keyvals = parse_qsl(query, keep_blank_values) +- else: +- # Python3's urllib.parse.parse_qsl does not work as wanted +- # for percent-encoded characters that do not match passed encoding, +- # they get lost. +- # +- # e.g., 'q=b%a3' becomes [('q', 'b\ufffd')] +- # (ie. with 'REPLACEMENT CHARACTER' (U+FFFD), +- # instead of \xa3 that you get with Python2's parse_qsl) +- # +- # what we want here is to keep raw bytes, and percent encode them +- # so as to preserve whatever encoding what originally used. +- # +- # See https://tools.ietf.org/html/rfc3987#section-6.4: +- # +- # For example, it is possible to have a URI reference of +- # "http://www.example.org/r%E9sum%E9.xml#r%C3%A9sum%C3%A9", where the +- # document name is encoded in iso-8859-1 based on server settings, but +- # where the fragment identifier is encoded in UTF-8 according to +- # [XPointer]. The IRI corresponding to the above URI would be (in XML +- # notation) +- # "http://www.example.org/r%E9sum%E9.xml#résumé". +- # Similar considerations apply to query parts. The functionality of +- # IRIs (namely, to be able to include non-ASCII characters) can only be +- # used if the query part is encoded in UTF-8. +- keyvals = parse_qsl_to_bytes(query, keep_blank_values) ++ ++ # Python's urllib.parse.parse_qsl does not work as wanted ++ # for percent-encoded characters that do not match passed encoding, ++ # they get lost. ++ # ++ # e.g., 'q=b%a3' becomes [('q', 'b\ufffd')] ++ # (ie. with 'REPLACEMENT CHARACTER' (U+FFFD), ++ # instead of \xa3 that you get with Python2's parse_qsl) ++ # ++ # what we want here is to keep raw bytes, and percent encode them ++ # so as to preserve whatever encoding what originally used. ++ # ++ # See https://tools.ietf.org/html/rfc3987#section-6.4: ++ # ++ # For example, it is possible to have a URI reference of ++ # "http://www.example.org/r%E9sum%E9.xml#r%C3%A9sum%C3%A9", where the ++ # document name is encoded in iso-8859-1 based on server settings, but ++ # where the fragment identifier is encoded in UTF-8 according to ++ # [XPointer]. The IRI corresponding to the above URI would be (in XML ++ # notation) ++ # "http://www.example.org/r%E9sum%E9.xml#résumé". ++ # Similar considerations apply to query parts. The functionality of ++ # IRIs (namely, to be able to include non-ASCII characters) can only be ++ # used if the query part is encoded in UTF-8. ++ keyvals = parse_qsl_to_bytes(query, keep_blank_values) ++ + keyvals.sort() + query = urlencode(keyvals) + +@@ -519,17 +526,12 @@ def _unquotepath(path): + for reserved in ('2f', '2F', '3f', '3F'): + path = path.replace('%' + reserved, '%25' + reserved.upper()) + +- if six.PY2: +- # in Python 2, '%a3' becomes '\xa3', which is what we want +- return unquote(path) +- else: +- # in Python 3, +- # standard lib's unquote() does not work for non-UTF-8 +- # percent-escaped characters, they get lost. +- # e.g., '%a3' becomes 'REPLACEMENT CHARACTER' (U+FFFD) +- # +- # unquote_to_bytes() returns raw bytes instead +- return unquote_to_bytes(path) ++ # standard lib's unquote() does not work for non-UTF-8 ++ # percent-escaped characters, they get lost. ++ # e.g., '%a3' becomes 'REPLACEMENT CHARACTER' (U+FFFD) ++ # ++ # unquote_to_bytes() returns raw bytes instead ++ return unquote_to_bytes(path) + + + def parse_url(url, encoding=None): +@@ -541,51 +543,48 @@ def parse_url(url, encoding=None): + return urlparse(to_unicode(url, encoding)) + + +-if not six.PY2: +- from urllib.parse import _coerce_args, unquote_to_bytes ++def parse_qsl_to_bytes(qs, keep_blank_values=False): ++ """Parse a query given as a string argument. ++ ++ Data are returned as a list of name, value pairs as bytes. + +- def parse_qsl_to_bytes(qs, keep_blank_values=False): +- """Parse a query given as a string argument. ++ Arguments: + +- Data are returned as a list of name, value pairs as bytes. ++ qs: percent-encoded query string to be parsed + +- Arguments: +- +- qs: percent-encoded query string to be parsed +- +- keep_blank_values: flag indicating whether blank values in +- percent-encoded queries should be treated as blank strings. A +- true value indicates that blanks should be retained as blank +- strings. The default false value indicates that blank values +- are to be ignored and treated as if they were not included. +- +- """ +- # This code is the same as Python3's parse_qsl() +- # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a) +- # except for the unquote(s, encoding, errors) calls replaced +- # with unquote_to_bytes(s) +- qs, _coerce_result = _coerce_args(qs) +- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] +- r = [] +- for name_value in pairs: +- if not name_value: ++ keep_blank_values: flag indicating whether blank values in ++ percent-encoded queries should be treated as blank strings. A ++ true value indicates that blanks should be retained as blank ++ strings. The default false value indicates that blank values ++ are to be ignored and treated as if they were not included. ++ ++ """ ++ # This code is the same as Python3's parse_qsl() ++ # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a) ++ # except for the unquote(s, encoding, errors) calls replaced ++ # with unquote_to_bytes(s) ++ qs, _coerce_result = _coerce_args(qs) ++ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] ++ r = [] ++ for name_value in pairs: ++ if not name_value: ++ continue ++ nv = name_value.split('=', 1) ++ if len(nv) != 2: ++ # Handle case of a control-name with no equal sign ++ if keep_blank_values: ++ nv.append('') ++ else: + continue +- nv = name_value.split('=', 1) +- if len(nv) != 2: +- # Handle case of a control-name with no equal sign +- if keep_blank_values: +- nv.append('') +- else: +- continue +- if len(nv[1]) or keep_blank_values: +- name = nv[0].replace('+', ' ') +- name = unquote_to_bytes(name) +- name = _coerce_result(name) +- value = nv[1].replace('+', ' ') +- value = unquote_to_bytes(value) +- value = _coerce_result(value) +- r.append((name, value)) +- return r ++ if len(nv[1]) or keep_blank_values: ++ name = nv[0].replace('+', ' ') ++ name = unquote_to_bytes(name) ++ name = _coerce_result(name) ++ value = nv[1].replace('+', ' ') ++ value = unquote_to_bytes(value) ++ value = _coerce_result(value) ++ r.append((name, value)) ++ return r + + + def urljoin_rfc(base, ref, encoding='utf-8'): +Index: w3lib-1.22.0/w3lib/util.py +=================================================================== +--- w3lib-1.22.0.orig/w3lib/util.py ++++ w3lib-1.22.0/w3lib/util.py +@@ -1,5 +1,3 @@ +-import six +- + def str_to_unicode(text, encoding=None, errors='strict'): + if encoding is None: + encoding = 'utf-8' +@@ -10,16 +8,16 @@ def str_to_unicode(text, encoding=None, + def unicode_to_str(text, encoding=None, errors='strict'): + if encoding is None: + encoding = 'utf-8' +- if isinstance(text, six.text_type): ++ if isinstance(text, str): + return text.encode(encoding, errors) + return text + + def to_unicode(text, encoding=None, errors='strict'): + """Return the unicode representation of a bytes object `text`. If `text` + is already an unicode object, return it as-is.""" +- if isinstance(text, six.text_type): ++ if isinstance(text, str): + return text +- if not isinstance(text, (bytes, six.text_type)): ++ if not isinstance(text, (bytes, str)): + raise TypeError('to_unicode must receive a bytes, str or unicode ' + 'object, got %s' % type(text).__name__) + if encoding is None: +@@ -31,7 +29,7 @@ def to_bytes(text, encoding=None, errors + is already a bytes object, return it as-is.""" + if isinstance(text, bytes): + return text +- if not isinstance(text, six.string_types): ++ if not isinstance(text, str): + raise TypeError('to_bytes must receive a unicode, str or bytes ' + 'object, got %s' % type(text).__name__) + if encoding is None: +@@ -39,9 +37,5 @@ def to_bytes(text, encoding=None, errors + return text.encode(encoding, errors) + + def to_native_str(text, encoding=None, errors='strict'): +- """ Return str representation of `text` +- (bytes in Python 2.x and unicode in Python 3.x). """ +- if six.PY2: +- return to_bytes(text, encoding, errors) +- else: +- return to_unicode(text, encoding, errors) ++ """ Return str representation of `text` """ ++ return to_unicode(text, encoding, errors) diff --git a/python-w3lib.changes b/python-w3lib.changes index f5d0722..a25e5ea 100644 --- a/python-w3lib.changes +++ b/python-w3lib.changes @@ -1,3 +1,11 @@ +------------------------------------------------------------------- +Fri Oct 14 19:10:33 UTC 2022 - pgajdos@suse.com + +- do not require six +- added patches + https://github.com/scrapy/w3lib/commit/c16d7bac3af3148b7018c67ef7922a5da6b3e640 + + python-w3lib-no-six.patch + ------------------------------------------------------------------- Fri Mar 19 14:28:28 UTC 2021 - Matej Cepl diff --git a/python-w3lib.spec b/python-w3lib.spec index 76bcf6c..3cff738 100644 --- a/python-w3lib.spec +++ b/python-w3lib.spec @@ -1,7 +1,7 @@ # # spec file for package python-w3lib # -# Copyright (c) 2021 SUSE LLC +# Copyright (c) 2022 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -28,9 +28,10 @@ Source: https://files.pythonhosted.org/packages/source/w/w3lib/w3lib-%{v # PATCH-FIX-UPSTREAM 166-add-xfail-test_add_or_replace_parameter_fail.patch mcepl@suse.com # Allow working with Python fixed CVE-2021-23336 Patch0: 166-add-xfail-test_add_or_replace_parameter_fail.patch +# https://github.com/scrapy/w3lib/commit/c16d7bac3af3148b7018c67ef7922a5da6b3e640 +Patch1: python-w3lib-no-six.patch BuildRequires: %{python_module pytest} BuildRequires: %{python_module setuptools} -BuildRequires: %{python_module six >= 1.4.1} BuildRequires: fdupes BuildRequires: python-rpm-macros BuildArch: noarch