1
0
mirror of https://github.com/openSUSE/osc.git synced 2025-02-04 10:36:17 +01:00

Support an arbitrary sized file in core.http_request

The old code only supports a file whose size is less then or equal
to INT_MAX (due to a reasonable(!) limit in M2Crypto). The actual
issue is in core.http_request which mmap(...)s the file, wraps it
into a memoryview/buffer and then passes the memoryview/buffer to
urlopen. Eventually, the whole memoryview/buffer is read into memory
(see m2_PyObject_GetBufferInt). If the file is too large (> INT_MAX),
m2_PyObject_GetBufferInt raises a ValueError (which is perfectly
fine!).
Reading a whole file into memory is completely insane. In order to
avoid this, we now simply pass a file-like object to urlopen (more
precisely, the file-like object is associated with the Request
instance that is passed to urlopen). The advantange is that the
file-like object is processed in chunks of 8192 bytes (see
http.client.HTTPConnection) (that is, only 8192 bytes are read into
memory (instead of the whole file)).

There are two pitfalls when passing a file-like object to urlopen:
* By default, a chunked Transfer-Encoding is applied. It seems that
  some servers (like api.o.o) do not like this (PUTing a file with
  a chunked Transfer-Encoding to api.o.o results in status 400). In
  order to avoid a chunked Transfer-Encoding, we explicitly set a
  Content-Length header (we also do this in the non-file case (just
  for the sake of completeness)).
* If the request fails with status 401, it is retried with an
  appropriate Authorization header. When retrying the request, the
  file's offset has to be repositioned to the beginning of the file
  (otherwise, a 0-length body is sent which most likely does not
  match the Content-Length header).

Note: core.http_request's "data" and "file" parameters are now mutually
exclusive because specifying both makes no sense (only one of them
is considered) and it simplifies the implementation a bit.

Fixes: #202 ("osc user authentification seems to be broken with last
commit")
Fixes: #304 ("osc ci - cannot handle more than 2 GB file uploads")
This commit is contained in:
Marcus Huewe 2021-04-09 15:57:00 +02:00
parent fc5470a152
commit c932f95d46
2 changed files with 56 additions and 45 deletions

View File

@ -478,6 +478,25 @@ def get_apiurl_usr(apiurl):
def _build_opener(apiurl):
from osc.core import __version__
global config
class OscHTTPBasicAuthHandler(HTTPBasicAuthHandler, object):
# python2: inherit from object in order to make it a new-style class
# (HTTPBasicAuthHandler is not a new-style class)
def _rewind_request(self, req):
if hasattr(req.data, 'seek'):
# if the request is issued again (this time with an
# Authorization header), the file's offset has to be
# repositioned to the beginning of the file (otherwise,
# a 0-length body is sent which most likely does not match
# the Content-Length header (if present))
req.data.seek(0)
def retry_http_basic_auth(self, host, req, realm):
self._rewind_request(req)
return super(self.__class__, self).retry_http_basic_auth(host, req,
realm)
if 'last_opener' not in _build_opener.__dict__:
_build_opener.last_opener = (None, None)
if apiurl == _build_opener.last_opener[0]:
@ -491,10 +510,10 @@ def _build_opener(apiurl):
# read proxies from env
proxyhandler = ProxyHandler()
authhandler_class = OscHTTPBasicAuthHandler
# workaround for http://bugs.python.org/issue9639
authhandler_class = HTTPBasicAuthHandler
if sys.version_info >= (2, 6, 6) and sys.version_info < (2, 7, 9):
class OscHTTPBasicAuthHandler(HTTPBasicAuthHandler):
class OscHTTPBasicAuthHandlerCompat(OscHTTPBasicAuthHandler):
# The following two functions were backported from upstream 2.7.
def http_error_auth_reqed(self, authreq, host, req, headers):
authreq = headers.get(authreq, None)
@ -510,6 +529,7 @@ def _build_opener(apiurl):
return self.retry_http_basic_auth(host, req, realm)
def retry_http_basic_auth(self, host, req, realm):
self._rewind_request(req)
user, pw = self.passwd.find_user_password(realm, host)
if pw is not None:
raw = "%s:%s" % (user, pw)
@ -521,7 +541,7 @@ def _build_opener(apiurl):
else:
return None
authhandler_class = OscHTTPBasicAuthHandler
authhandler_class = OscHTTPBasicAuthHandlerCompat
options = config['api_host_options'][apiurl]
# with None as first argument, it will always use this username/password

View File

@ -3340,22 +3340,28 @@ def makeurl(baseurl, l, query=[]):
def http_request(method, url, headers={}, data=None, file=None):
"""wrapper around urllib2.urlopen for error handling,
and to support additional (PUT, DELETE) methods"""
def create_memoryview(obj):
if sys.version_info < (2, 7, 99):
# obj might be a mmap and python 2.7's mmap does not
# behave like a bytearray (a bytearray in turn can be used
# to create the memoryview). For now simply return a buffer
return buffer(obj)
return memoryview(obj)
class DataContext:
"""Wrap a data value (or None) in a context manager."""
filefd = None
def __init__(self, data):
self._data = data
def __enter__(self):
return self._data
def __exit__(self, exc_type, exc_val, exc_tb):
return None
if file is not None and data is not None:
raise RuntimeError('file and data are mutually exclusive')
if conf.config['http_debug']:
print('\n\n--', method, url, file=sys.stderr)
if method == 'POST' and not file and not data:
# adding data to an urllib2 request transforms it into a POST
data = ''
data = b''
req = URLRequest(url)
api_host_options = {}
@ -3379,43 +3385,28 @@ def http_request(method, url, headers={}, data=None, file=None):
print(headers[i])
req.add_header(i, headers[i])
if file and not data:
size = os.path.getsize(file)
if size < 1024*512:
data = open(file, 'rb').read()
else:
import mmap
filefd = open(file, 'rb')
try:
if sys.platform[:3] != 'win':
data = mmap.mmap(filefd.fileno(), os.path.getsize(file), mmap.MAP_SHARED, mmap.PROT_READ)
else:
data = mmap.mmap(filefd.fileno(), os.path.getsize(file))
data = create_memoryview(data)
except EnvironmentError as e:
if e.errno == 19:
sys.exit('\n\n%s\nThe file \'%s\' could not be memory mapped. It is ' \
'\non a filesystem which does not support this.' % (e, file))
elif hasattr(e, 'winerror') and e.winerror == 5:
# falling back to the default io
data = open(file, 'rb').read()
else:
raise
if conf.config['debug']: print(method, url, file=sys.stderr)
try:
content_length = None
if data is not None:
if isinstance(data, str):
data = bytes(data, "utf-8")
fd = urlopen(req, data=data)
data = data.encode('utf-8')
content_length = len(data)
elif file is not None:
content_length = os.path.getsize(file)
finally:
if hasattr(conf.cookiejar, 'save'):
conf.cookiejar.save(ignore_discard=True)
if filefd: filefd.close()
return fd
with (open(file, 'rb') if file is not None else DataContext(data)) as d:
req.data = d
if content_length is not None:
# do this after setting req.data because the corresponding setter
# kills an existing Content-Length header (see urllib.Request class
# (python38))
req.add_header('Content-Length', str(content_length))
try:
return urlopen(req)
finally:
if hasattr(conf.cookiejar, 'save'):
conf.cookiejar.save(ignore_discard=True)
def http_GET(*args, **kwargs): return http_request('GET', *args, **kwargs)