Accepting request 679030 from home:mcepl:branches:devel:languages:python

- Update to the upstream version 4.0.0: * Port to Python 3 rocket * Add curl_obj option to grabber * Throw an obvious error message when urlgrabber-ext-down is missing when attempting to use external downloader * Use setuptools for setup.py instead of distutils OBS-URL: https://build.opensuse.org/request/show/679030 OBS-URL: https://build.opensuse.org/package/show/devel:languages:python/python-urlgrabber?expand=0&rev=31
2019-02-25 18:00:58 +00:00
parent 526fbefc40
commit 4192a3124d
8 changed files with 87 additions and 282 deletions
--- a/declare-dollar-sign-as-safe-in-urlquote.patch
+++ b/declare-dollar-sign-as-safe-in-urlquote.patch
@@ -1,13 +0,0 @@
-Index: urlgrabber-3.9.1/urlgrabber/grabber.py
-===================================================================
--- urlgrabber-3.9.1.orig/urlgrabber/grabber.py
-+++ urlgrabber-3.9.1/urlgrabber/grabber.py
-@@ -715,7 +715,7 @@ class URLParser:
-         passing into urlgrabber.
-         """
-         (scheme, host, path, parm, query, frag) = parts
-        path = urllib.quote(path)
-+        path = urllib.quote(path, safe='/$')
-         return (scheme, host, path, parm, query, frag)
- 
-     hexvals = '0123456789ABCDEF'
--- a/grabber_fix.diff
+++ b/grabber_fix.diff
@@ -1,30 +1,8 @@
--- urlgrabber-3.9.1/urlgrabber/grabber.py.orig	2010-07-02 21:24:12.000000000 -0400
-+++ urlgrabber-3.9.1/urlgrabber/grabber.py	2010-07-02 20:30:25.000000000 -0400
-@@ -68,14 +68,14 @@
-     (which can be set on default_grabber.throttle) is used. See
-     BANDWIDTH THROTTLING for more information.
- 
-  timeout = None
-+  timeout = 300
- 
-    a positive float expressing the number of seconds to wait for socket
-    operations. If the value is None or 0.0, socket operations will block
-    forever. Setting this option causes urlgrabber to call the settimeout
-    method on the Socket object used for the request. See the Python
-    documentation on settimeout for more information.
-    http://www.python.org/doc/current/lib/socket-objects.html
-+    a positive integer expressing the number of seconds to wait before
-+    timing out attempts to connect to a server. If the value is None
-+    or 0, connection attempts will not time out. The timeout is passed
-+    to the underlying pycurl object as its CONNECTTIMEOUT option, see
-+    the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
-+    http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
- 
-   bandwidth = 0
- 
-@@ -439,6 +439,12 @@
- except:
-     __version__ = '???'
+--- a/urlgrabber/grabber.py
+++ b/urlgrabber/grabber.py
+@@ -594,6 +594,12 @@ def _urlunquote_convert(s):
+         s = s.decode('utf8')
+     return urlunquote(s)
 
 +try:
 +    # this part isn't going to do much - need to talk to gettext
@@ -33,129 +11,11 @@
 +    def _(st): return st
 +
 ########################################################################
- # functions for debugging output.  These functions are here because they
- # are also part of the module initialization.
-@@ -808,7 +814,7 @@
-         self.prefix = None
-         self.opener = None
-         self.cache_openers = True
-        self.timeout = None
-+        self.timeout = 300
-         self.text = None
-         self.http_headers = None
-         self.ftp_headers = None
-@@ -1052,9 +1058,15 @@
-         self._reget_length = 0
-         self._prog_running = False
-         self._error = (None, None)
-        self.size = None
-+        self.size = 0
-+        self._hdr_ended = False
-         self._do_open()
-         
-+
-+    def geturl(self):
-+        """ Provide the geturl() method, used to be got from
-+            urllib.addinfourl, via. urllib.URLopener.* """
-+        return self.url
-         
-     def __getattr__(self, name):
-         """This effectively allows us to wrap at the instance level.
-@@ -1085,9 +1097,14 @@
-             return -1
-             
-     def _hdr_retrieve(self, buf):
-+        if self._hdr_ended:
-+            self._hdr_dump = ''
-+            self.size = 0
-+            self._hdr_ended = False
-+
-         if self._over_max_size(cur=len(self._hdr_dump), 
-                                max_size=self.opts.max_header_size):
-            return -1            
-+            return -1
-         try:
-             self._hdr_dump += buf
-             # we have to get the size before we do the progress obj start
-@@ -1104,7 +1121,17 @@
-                     s = parse150(buf)
-                 if s:
-                     self.size = int(s)
-            
-+                    
-+            if buf.lower().find('location') != -1:
-+                location = ':'.join(buf.split(':')[1:])
-+                location = location.strip()
-+                self.scheme = urlparse.urlsplit(location)[0]
-+                self.url = location
-+                
-+            if len(self._hdr_dump) != 0 and buf == '\r\n':
-+                self._hdr_ended = True
-+                if DEBUG: DEBUG.info('header ended:')
-+                
-             return len(buf)
-         except KeyboardInterrupt:
-             return pycurl.READFUNC_ABORT
-@@ -1113,8 +1140,10 @@
-         if self._parsed_hdr:
-             return self._parsed_hdr
-         statusend = self._hdr_dump.find('\n')
-+        statusend += 1 # ridiculous as it may seem.
-         hdrfp = StringIO()
-         hdrfp.write(self._hdr_dump[statusend:])
-+        hdrfp.seek(0)
-         self._parsed_hdr =  mimetools.Message(hdrfp)
-         return self._parsed_hdr
-     
-@@ -1136,6 +1165,7 @@
-         self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
-         self.curl_obj.setopt(pycurl.FAILONERROR, True)
-         self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
-+        self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
-         
-         if DEBUG:
-             self.curl_obj.setopt(pycurl.VERBOSE, True)
-@@ -1148,9 +1178,11 @@
-         
-         # timeouts
-         timeout = 300
-        if opts.timeout:
-            timeout = int(opts.timeout)
-            self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
-+        if hasattr(opts, 'timeout'):
-+            timeout = int(opts.timeout or 0)
-+        self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
-+        self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1)
-+        self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout)
- 
-         # ssl options
-         if self.scheme == 'https':
-@@ -1276,7 +1308,7 @@
-                 raise err
- 
-             elif errcode == 60:
-                msg = _("client cert cannot be verified or client cert incorrect")
-+                msg = _("Peer cert cannot be verified or peer cert invalid")
-                 err = URLGrabError(14, msg)
-                 err.url = self.url
-                 raise err
-@@ -1291,7 +1323,12 @@
-                 raise err
-                     
-             elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
-                msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
-+                if self.scheme in ['http', 'https']:
-+                    msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
-+                elif self.scheme in ['ftp']:
-+                    msg = 'FTP Error %s : %s ' % (self.http_code, self.url)
-+                else:
-+                    msg = "Unknown Error: URL=%s , scheme=%s" % (self.url, self.scheme)
-             else:
-                 msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
-                 code = errcode
-@@ -1299,6 +1336,12 @@
-             err.code = code
-             err.exception = e
+ #                     MODULE INITIALIZATION
+ ########################################################################
+@@ -1298,6 +1304,12 @@ class URLGrabber(object):
+                                _('Exceeded limit (%i): %s') % (limit, url))
+             err.url = url
             raise err
 +        else:
 +            if self._error[1]:
@@ -164,73 +24,64 @@
 +                err.url = self.url
 +                raise err
 
-     def _do_open(self):
-         self.curl_obj = _curl_cache
-@@ -1446,9 +1489,23 @@
-             # set the time
-             mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
-             if mod_time != -1:
-                os.utime(self.filename, (mod_time, mod_time))
-+                try:
-+                    os.utime(self.filename, (mod_time, mod_time))
-+                except OSError, e:
-+                    err = URLGrabError(16, _(\
-+                      'error setting timestamp on file %s from %s, OSError: %s') 
-+                              % (self.filenameself.url, e))
-+                    err.url = self.url
-+                    raise err
-             # re open it
-            self.fo = open(self.filename, 'r')
-+            try:
-+                self.fo = open(self.filename, 'r')
-+            except IOError, e:
-+                err = URLGrabError(16, _(\
-+                  'error opening file from %s, IOError: %s') % (self.url, e))
-+                err.url = self.url
-+                raise err
-+                
-         else:
-             #self.fo = open(self._temp_name, 'r')
-             self.fo.seek(0)
-@@ -1532,11 +1589,14 @@
-     def _over_max_size(self, cur, max_size=None):
+         return s
 
-         if not max_size:
-            max_size = self.size
-        if self.opts.size: # if we set an opts size use that, no matter what
-            max_size = self.opts.size
-+            if not self.opts.size:
-+                max_size = self.size
-+            else:
-+                max_size = self.opts.size
-+
-         if not max_size: return False # if we have None for all of the Max then this is dumb
-        if cur > max_size + max_size*.10:
-+
-+        if cur > int(float(max_size) * 1.10):
+@@ -1342,6 +1354,10 @@ class PyCurlFileObject(object):
+         self._tm_last = None
+         self._do_open()
 
-             msg = _("Downloaded more than max size for %s: %s > %s") \
-                         % (self.url, cur, max_size)
-@@ -1582,9 +1642,21 @@
-             self.opts.progress_obj.end(self._amount_read)
-         self.fo.close()
-         
-
 +    def geturl(self):
 +        """ Provide the geturl() method, used to be got from
 +            urllib.addinfourl, via. urllib.URLopener.* """
 +        return self.url
-+        
- _curl_cache = pycurl.Curl() # make one and reuse it over and over and over
 
-+def reset_curl_obj():
-+    """To make sure curl has reread the network/dns info we force a reload"""
-+    global _curl_cache
-+    _curl_cache.close()
-+    _curl_cache = pycurl.Curl()
-+
-+
-+    
+     def __getattr__(self, name):
+         """This effectively allows us to wrap at the instance level.
+@@ -1391,7 +1407,7 @@ class PyCurlFileObject(object):
 
- #####################################################################
- # DEPRECATED FUNCTIONS
+     def _hdr_retrieve(self, buf):
+         if self._hdr_ended:
+-            self._hdr_dump = b''
+            self._hdr_dump = ''
+             self.size = 0
+             self._hdr_ended = False
+ 
+@@ -1426,16 +1442,15 @@ class PyCurlFileObject(object):
+                 if s:
+                     self.size = int(s)
+ 
+-            if buf.lower().find(b'location') != -1:
+-                location = b':'.join(buf.split(b':')[1:])
+            if buf.lower().find('location') != -1:
+                location = ':'.join(buf.split(':')[1:])
+                 location = location.strip()
+                 self.scheme = urlparse.urlsplit(location)[0]
+                 self.url = location
+ 
+-            self._hdr_dump += buf
+-            if len(self._hdr_dump) != 0 and buf == b'\r\n':
+            if len(self._hdr_dump) != 0 and buf == '\r\n':
+                 self._hdr_ended = True
+-                if DEBUG: DEBUG.debug('header ended:')
+                if DEBUG: DEBUG.info('header ended:')
+ 
+             return len(buf)
+         except KeyboardInterrupt:
+@@ -1444,7 +1459,7 @@ class PyCurlFileObject(object):
+     def _return_hdr_obj(self):
+         if self._parsed_hdr:
+             return self._parsed_hdr
+-        statusend = self._hdr_dump.find(b'\n')
+        statusend = self._hdr_dump.find('\n')
+         statusend += 1 # ridiculous as it may seem.
+         hdrfp = StringIO()
+         hdrfp.write(self._hdr_dump[statusend:])
+@@ -1498,7 +1513,7 @@ class PyCurlFileObject(object):
+         if hasattr(opts, 'timeout'):
+             timeout = int(opts.timeout or 0)
+         self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
+-        self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, opts.minrate or 1000)
+        self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1)
+         self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout)
+ 
+         # ssl options
--- a/python-urlgrabber-3.9.1-preserve-queryparams-in-urls.patch
+++ b/python-urlgrabber-3.9.1-preserve-queryparams-in-urls.patch
@@ -1,28 +0,0 @@
-diff --unified -u -r urlgrabber-3.9.1.orig/urlgrabber/mirror.py urlgrabber-3.9.1/urlgrabber/mirror.py
--- urlgrabber-3.9.1.orig/urlgrabber/mirror.py	2014-09-16 14:44:54.582048746 +0200
-+++ urlgrabber-3.9.1/urlgrabber/mirror.py	2014-09-16 14:49:24.138034099 +0200
-@@ -88,6 +88,7 @@
- 
- 
- import random
-+import urlparse
- import thread  # needed for locking to make this threadsafe
- 
- from grabber import URLGrabError, CallbackObject, DEBUG
-@@ -366,11 +367,12 @@
-     # by overriding the configuration methods :)
- 
-     def _join_url(self, base_url, rel_url):
-        if base_url.endswith('/') or rel_url.startswith('/'):
-            return base_url + rel_url
-+        (scheme, netloc, path, query, fragid) = urlparse.urlsplit(base_url)
-+        if path.endswith('/') or rel_url.startswith('/'):
-+            return urlparse.urlunsplit((scheme, netloc, path + rel_url, query, fragid))
-         else:
-            return base_url + '/' + rel_url
-        
-+            return urlparse.urlunsplit((scheme, netloc, path + '/' + rel_url, query, fragid))
-+
-     def _mirror_try(self, func, url, kw):
-         gr = GrabRequest()
-         gr.func = func
--- a/python-urlgrabber-3.9.1-set-SSL_VERIFYHOST-correct.dif
+++ b/python-urlgrabber-3.9.1-set-SSL_VERIFYHOST-correct.dif
@@ -1,14 +0,0 @@
-Index: urlgrabber-3.9.1/urlgrabber/grabber.py
-===================================================================
--- urlgrabber-3.9.1.orig/urlgrabber/grabber.py
-+++ urlgrabber-3.9.1/urlgrabber/grabber.py
-@@ -1190,7 +1190,8 @@ class PyCurlFileObject():
-                 self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
-                 self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert)
-             self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer)
-            self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, opts.ssl_verify_host)
-+            if opts.ssl_verify_host: # 1 is meaningless to curl
-+                self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, 2)
-             if opts.ssl_key:
-                 self.curl_obj.setopt(pycurl.SSLKEY, opts.ssl_key)
-             if opts.ssl_key_type:
--- a/python-urlgrabber.changes
+++ b/python-urlgrabber.changes
@@ -1,3 +1,13 @@
+-------------------------------------------------------------------
+Mon Feb 25 17:44:43 CET 2019 - Matej Cepl <mcepl@suse.com>
+
+- Update to the upstream version 4.0.0:
+  * Port to Python 3 rocket
+  * Add curl_obj option to grabber
+  * Throw an obvious error message when urlgrabber-ext-down is
+    missing when attempting to use external downloader
+  * Use setuptools for setup.py instead of distutils
+
 -------------------------------------------------------------------
 Tue Dec  4 12:55:41 UTC 2018 - Matej Cepl <mcepl@suse.com>

--- a/python-urlgrabber.spec
+++ b/python-urlgrabber.spec
@@ -1,7 +1,7 @@
 #
 # spec file for package python-urlgrabber
 #
-# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -19,20 +19,20 @@
 %{!?python_sitelib: %global python_sitelib %(python -c "from distutils.sysconfig import get_python_lib; print get_python_lib()")}
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 %define skip_python3 1
+%define modname urlgrabber
+
 Name:           python-urlgrabber
-Version:        3.9.1
+Version:        4.0.0
 Release:        0
 Summary:        A high-level cross-protocol url-grabber
 License:        LGPL-2.1-only
 Group:          Development/Languages/Python
 URL:            http://urlgrabber.baseurl.org
-Source:         https://files.pythonhosted.org/packages/source/u/urlgrabber/urlgrabber-%{version}.tar.gz
+Source:         https://github.com/rpm-software-management/%{modname}/releases/download/%{modname}-4-0-0/%{modname}-%{version}.tar.gz
 Patch0:         grabber_fix.diff
-# PATCH-FIX-UPSTREAM bnc#896844
-Patch1:         python-urlgrabber-3.9.1-preserve-queryparams-in-urls.patch
-Patch2:         declare-dollar-sign-as-safe-in-urlquote.patch
-Patch3:         python-urlgrabber-3.9.1-set-SSL_VERIFYHOST-correct.dif
 BuildRequires:  %{python_module pycurl}
+BuildRequires:  %{python_module setuptools}
+BuildRequires:  %{python_module six}
 BuildRequires:  fdupes
 BuildRequires:  python-rpm-macros
 Requires:       python-pycurl
@@ -47,10 +47,7 @@ throttling, authentication, proxies and more.
 %prep
 %setup -q -n urlgrabber-%{version}
 sed -i "13d" urlgrabber/__init__.py # Remove wrong license header, fixes bnc#781323
-%patch0 -p1
-%patch1 -p1
-%patch2 -p1
-%patch3 -p1
+%autopatch -p1

 %build
 %python_build
@@ -58,6 +55,7 @@ sed -i "13d" urlgrabber/__init__.py # Remove wrong license header, fixes bnc#781
 %install
 %python_install
 rm -rf %{buildroot}%{_datadir}/doc/urlgrabber-%{version} # Remove wrongly installed docs
+mv -v %{buildroot}%{_usr}/libexec/urlgrabber-ext-down %{buildroot}%{_usr}/lib/urlgrabber-ext-down
 %python_expand %fdupes %{buildroot}%{$python_sitelib}

 %files %{python_files}
@@ -65,5 +63,6 @@ rm -rf %{buildroot}%{_datadir}/doc/urlgrabber-%{version} # Remove wrongly instal
 %doc ChangeLog README TODO
 %{_bindir}/urlgrabber
 %{python_sitelib}/*
+%{_usr}/lib/urlgrabber*

 %changelog
--- a/urlgrabber-3.9.1.tar.gz
+++ b/urlgrabber-3.9.1.tar.gz
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b4e276fa968c66671309a6d754c4b3b0cb2003dec8bca87a681378a22e0d3da7
-size 72071
--- a/urlgrabber-4.0.0.tar.gz
+++ b/urlgrabber-4.0.0.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d152d10c7decce45ce5b44f8ee8ee5fd0047217f4152eebb8e0c552ca5137af0
+size 86308