diff --git a/grabber_fix.diff b/grabber_fix.diff new file mode 100644 index 0000000..2cf3257 --- /dev/null +++ b/grabber_fix.diff @@ -0,0 +1,236 @@ +--- urlgrabber-3.9.1/urlgrabber/grabber.py.orig 2010-07-02 21:24:12.000000000 -0400 ++++ urlgrabber-3.9.1/urlgrabber/grabber.py 2010-07-02 20:30:25.000000000 -0400 +@@ -68,14 +68,14 @@ + (which can be set on default_grabber.throttle) is used. See + BANDWIDTH THROTTLING for more information. + +- timeout = None ++ timeout = 300 + +- a positive float expressing the number of seconds to wait for socket +- operations. If the value is None or 0.0, socket operations will block +- forever. Setting this option causes urlgrabber to call the settimeout +- method on the Socket object used for the request. See the Python +- documentation on settimeout for more information. +- http://www.python.org/doc/current/lib/socket-objects.html ++ a positive integer expressing the number of seconds to wait before ++ timing out attempts to connect to a server. If the value is None ++ or 0, connection attempts will not time out. The timeout is passed ++ to the underlying pycurl object as its CONNECTTIMEOUT option, see ++ the curl documentation on CURLOPT_CONNECTTIMEOUT for more information. ++ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT + + bandwidth = 0 + +@@ -439,6 +439,12 @@ + except: + __version__ = '???' + ++try: ++ # this part isn't going to do much - need to talk to gettext ++ from i18n import _ ++except ImportError, msg: ++ def _(st): return st ++ + ######################################################################## + # functions for debugging output. These functions are here because they + # are also part of the module initialization. +@@ -808,7 +814,7 @@ + self.prefix = None + self.opener = None + self.cache_openers = True +- self.timeout = None ++ self.timeout = 300 + self.text = None + self.http_headers = None + self.ftp_headers = None +@@ -1052,9 +1058,15 @@ + self._reget_length = 0 + self._prog_running = False + self._error = (None, None) +- self.size = None ++ self.size = 0 ++ self._hdr_ended = False + self._do_open() + ++ ++ def geturl(self): ++ """ Provide the geturl() method, used to be got from ++ urllib.addinfourl, via. urllib.URLopener.* """ ++ return self.url + + def __getattr__(self, name): + """This effectively allows us to wrap at the instance level. +@@ -1085,9 +1097,14 @@ + return -1 + + def _hdr_retrieve(self, buf): ++ if self._hdr_ended: ++ self._hdr_dump = '' ++ self.size = 0 ++ self._hdr_ended = False ++ + if self._over_max_size(cur=len(self._hdr_dump), + max_size=self.opts.max_header_size): +- return -1 ++ return -1 + try: + self._hdr_dump += buf + # we have to get the size before we do the progress obj start +@@ -1104,7 +1121,17 @@ + s = parse150(buf) + if s: + self.size = int(s) +- ++ ++ if buf.lower().find('location') != -1: ++ location = ':'.join(buf.split(':')[1:]) ++ location = location.strip() ++ self.scheme = urlparse.urlsplit(location)[0] ++ self.url = location ++ ++ if len(self._hdr_dump) != 0 and buf == '\r\n': ++ self._hdr_ended = True ++ if DEBUG: DEBUG.info('header ended:') ++ + return len(buf) + except KeyboardInterrupt: + return pycurl.READFUNC_ABORT +@@ -1113,8 +1140,10 @@ + if self._parsed_hdr: + return self._parsed_hdr + statusend = self._hdr_dump.find('\n') ++ statusend += 1 # ridiculous as it may seem. + hdrfp = StringIO() + hdrfp.write(self._hdr_dump[statusend:]) ++ hdrfp.seek(0) + self._parsed_hdr = mimetools.Message(hdrfp) + return self._parsed_hdr + +@@ -1136,6 +1165,7 @@ + self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update) + self.curl_obj.setopt(pycurl.FAILONERROR, True) + self.curl_obj.setopt(pycurl.OPT_FILETIME, True) ++ self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True) + + if DEBUG: + self.curl_obj.setopt(pycurl.VERBOSE, True) +@@ -1148,9 +1178,11 @@ + + # timeouts + timeout = 300 +- if opts.timeout: +- timeout = int(opts.timeout) +- self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout) ++ if hasattr(opts, 'timeout'): ++ timeout = int(opts.timeout or 0) ++ self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout) ++ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1) ++ self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout) + + # ssl options + if self.scheme == 'https': +@@ -1276,7 +1308,7 @@ + raise err + + elif errcode == 60: +- msg = _("client cert cannot be verified or client cert incorrect") ++ msg = _("Peer cert cannot be verified or peer cert invalid") + err = URLGrabError(14, msg) + err.url = self.url + raise err +@@ -1291,7 +1323,12 @@ + raise err + + elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it +- msg = 'HTTP Error %s : %s ' % (self.http_code, self.url) ++ if self.scheme in ['http', 'https']: ++ msg = 'HTTP Error %s : %s ' % (self.http_code, self.url) ++ elif self.scheme in ['ftp']: ++ msg = 'FTP Error %s : %s ' % (self.http_code, self.url) ++ else: ++ msg = "Unknown Error: URL=%s , scheme=%s" % (self.url, self.scheme) + else: + msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1])) + code = errcode +@@ -1299,6 +1336,12 @@ + err.code = code + err.exception = e + raise err ++ else: ++ if self._error[1]: ++ msg = self._error[1] ++ err = URLGRabError(14, msg) ++ err.url = self.url ++ raise err + + def _do_open(self): + self.curl_obj = _curl_cache +@@ -1446,9 +1489,23 @@ + # set the time + mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME) + if mod_time != -1: +- os.utime(self.filename, (mod_time, mod_time)) ++ try: ++ os.utime(self.filename, (mod_time, mod_time)) ++ except OSError, e: ++ err = URLGrabError(16, _(\ ++ 'error setting timestamp on file %s from %s, OSError: %s') ++ % (self.filenameself.url, e)) ++ err.url = self.url ++ raise err + # re open it +- self.fo = open(self.filename, 'r') ++ try: ++ self.fo = open(self.filename, 'r') ++ except IOError, e: ++ err = URLGrabError(16, _(\ ++ 'error opening file from %s, IOError: %s') % (self.url, e)) ++ err.url = self.url ++ raise err ++ + else: + #self.fo = open(self._temp_name, 'r') + self.fo.seek(0) +@@ -1532,11 +1589,14 @@ + def _over_max_size(self, cur, max_size=None): + + if not max_size: +- max_size = self.size +- if self.opts.size: # if we set an opts size use that, no matter what +- max_size = self.opts.size ++ if not self.opts.size: ++ max_size = self.size ++ else: ++ max_size = self.opts.size ++ + if not max_size: return False # if we have None for all of the Max then this is dumb +- if cur > max_size + max_size*.10: ++ ++ if cur > int(float(max_size) * 1.10): + + msg = _("Downloaded more than max size for %s: %s > %s") \ + % (self.url, cur, max_size) +@@ -1582,9 +1642,21 @@ + self.opts.progress_obj.end(self._amount_read) + self.fo.close() + +- ++ def geturl(self): ++ """ Provide the geturl() method, used to be got from ++ urllib.addinfourl, via. urllib.URLopener.* """ ++ return self.url ++ + _curl_cache = pycurl.Curl() # make one and reuse it over and over and over + ++def reset_curl_obj(): ++ """To make sure curl has reread the network/dns info we force a reload""" ++ global _curl_cache ++ _curl_cache.close() ++ _curl_cache = pycurl.Curl() ++ ++ ++ + + ##################################################################### + # DEPRECATED FUNCTIONS diff --git a/python-urlgrabber.changes b/python-urlgrabber.changes index dd1e613..90738fb 100644 --- a/python-urlgrabber.changes +++ b/python-urlgrabber.changes @@ -1,3 +1,9 @@ +------------------------------------------------------------------- +Wed Feb 6 18:06:41 UTC 2013 - jmatejek@suse.com + +- Add grabber_fix.diff: Fixed timeout and other errors breaking yum + compatibility (bnc#793650) + ------------------------------------------------------------------- Mon Oct 1 09:53:26 UTC 2012 - saschpe@suse.de diff --git a/python-urlgrabber.spec b/python-urlgrabber.spec index e6410fb..723c7cf 100644 --- a/python-urlgrabber.spec +++ b/python-urlgrabber.spec @@ -1,7 +1,7 @@ # # spec file for package python-urlgrabber # -# Copyright (c) 2012 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2013 SUSE LINUX Products GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -24,6 +24,7 @@ Summary: A high-level cross-protocol url-grabber License: LGPL-2.1 Group: Development/Languages/Python Source: http://pypi.python.org/packages/source/u/urlgrabber/urlgrabber-%{version}.tar.gz +Patch0: grabber_fix.diff BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: python-devel BuildRequires: python-pycurl @@ -44,6 +45,7 @@ throttling, authentication, proxies and more. %prep %setup -q -n urlgrabber-%{version} sed -i "13d" urlgrabber/__init__.py # Remove wrong license header, fixes bnc#781323 +%patch0 -p1 %build python setup.py build