1
0
mirror of https://github.com/openSUSE/osc.git synced 2025-01-26 06:46:13 +01:00
github.com_openSUSE_osc/osc/fetch.py
William Brown 647c4fe34d OBS has a fault where is sends invalid md5s
This causes downloads to come from the api, generally on noarch packages.
However, in countries like australia, due to OBS' high latency, and poor
bandwidth, these faults can cause downloads to take more than an hour, compared
to using a local mirror which can take minutes. There is actually nothing
wrong with the packages it all, OBS just sends the wrong md5.

As a result, ignore the problem and complain about it, because OBS is broken
here, not osc, and this wastes a lot of time.
2022-10-06 10:50:24 +10:00

431 lines
17 KiB
Python

# Copyright (C) 2006 Novell Inc. All rights reserved.
# This program is free software; it may be used, copied, modified
# and distributed under the terms of the GNU General Public Licence,
# either version 2, or (at your option) any later version.
import os
import re
import shutil
import subprocess
import sys
import tempfile
from urllib.parse import quote_plus
from urllib.request import HTTPError
from . import checker as osc_checker
from . import conf
from . import oscerr
from .core import makeurl, dgst
from .grabber import OscFileGrabber, OscMirrorGroup
from .meter import create_text_meter
from .util import packagequery, cpio
from .util.helper import decode_it
class Fetcher:
def __init__(self, cachedir='/tmp', urllist=None,
http_debug=False, cookiejar=None, offline=False,
enable_cpio=True, modules=None, download_api_only=False):
# set up progress bar callback
self.progress_obj = None
if sys.stdout.isatty():
self.progress_obj = create_text_meter(use_pb_fallback=False)
self.cachedir = cachedir
# generic download URL lists
self.urllist = urllist or []
self.modules = modules or []
self.http_debug = http_debug
self.offline = offline
self.cpio = {}
self.enable_cpio = enable_cpio
self.download_api_only = download_api_only
self.gr = OscFileGrabber(progress_obj=self.progress_obj)
def __add_cpio(self, pac):
prpap = '%s/%s/%s/%s' % (pac.project, pac.repository, pac.repoarch, pac.repopackage)
self.cpio.setdefault(prpap, {})[pac.repofilename] = pac
def __download_cpio_archive(self, apiurl, project, repo, arch, package, **pkgs):
if not pkgs:
return
query = ['binary=%s' % quote_plus(i) for i in pkgs]
query.append('view=cpio')
for module in self.modules:
query.append('module=' + module)
try:
url = makeurl(apiurl, ['build', project, repo, arch, package], query=query)
sys.stdout.write("preparing download ...\r")
sys.stdout.flush()
with tempfile.NamedTemporaryFile(prefix='osc_build_cpio') as tmparchive:
self.gr.urlgrab(url, filename=tmparchive.name,
text='fetching packages for \'%s\'' % project)
archive = cpio.CpioRead(tmparchive.name)
archive.read()
for hdr in archive:
# XXX: we won't have an .errors file because we're using
# getbinarylist instead of the public/... route
# (which is routed to getbinaries)
# getbinaries does not support kiwi builds
if hdr.filename == b'.errors':
archive.copyin_file(hdr.filename)
raise oscerr.APIError('CPIO archive is incomplete '
'(see .errors file)')
if package == '_repository':
n = re.sub(br'\.pkg\.tar\.(zst|.z)$', b'.arch', hdr.filename)
if n.startswith(b'container:'):
n = re.sub(br'\.tar\.(zst|.z)$', b'.tar', hdr.filename)
pac = pkgs[decode_it(n.rsplit(b'.', 1)[0])]
pac.canonname = hdr.filename
else:
pac = pkgs[decode_it(n.rsplit(b'.', 1)[0])]
else:
# this is a kiwi product
pac = pkgs[decode_it(hdr.filename)]
# Extract a single file from the cpio archive
try:
fd, tmpfile = tempfile.mkstemp(prefix='osc_build_file')
archive.copyin_file(hdr.filename,
decode_it(os.path.dirname(tmpfile)),
decode_it(os.path.basename(tmpfile)))
self.move_package(tmpfile, pac.localdir, pac)
finally:
os.close(fd)
if os.path.exists(tmpfile):
os.unlink(tmpfile)
for pac in pkgs.values():
if not os.path.isfile(pac.fullfilename):
raise oscerr.APIError('failed to fetch file \'%s\': '
'missing in CPIO archive' %
pac.repofilename)
except HTTPError as e:
if e.code != 414:
raise
# query str was too large
keys = list(pkgs.keys())
if len(keys) == 1:
raise oscerr.APIError('unable to fetch cpio archive: '
'server always returns code 414')
n = int(len(pkgs) / 2)
new_pkgs = {k: pkgs[k] for k in keys[:n]}
self.__download_cpio_archive(apiurl, project, repo, arch,
package, **new_pkgs)
new_pkgs = {k: pkgs[k] for k in keys[n:]}
self.__download_cpio_archive(apiurl, project, repo, arch,
package, **new_pkgs)
def __fetch_cpio(self, apiurl):
for prpap, pkgs in self.cpio.items():
project, repo, arch, package = prpap.split('/', 3)
self.__download_cpio_archive(apiurl, project, repo, arch, package, **pkgs)
def fetch(self, pac, prefix=''):
# for use by the failure callback
self.curpac = pac
mg = OscMirrorGroup(self.gr, pac.urllist)
if self.http_debug:
print('\nURLs to try for package \'%s\':' % pac, file=sys.stderr)
print('\n'.join(pac.urllist), file=sys.stderr)
print(file=sys.stderr)
try:
with tempfile.NamedTemporaryFile(prefix='osc_build',
delete=False) as tmpfile:
mg_stat = mg.urlgrab(pac.filename, filename=tmpfile.name,
text='%s(%s) %s' % (prefix, pac.project, pac.filename))
if mg_stat:
self.move_package(tmpfile.name, pac.localdir, pac)
if not mg_stat:
if self.enable_cpio:
print('%s/%s: attempting download from api, since not found'
% (pac.project, pac.name))
self.__add_cpio(pac)
return
print()
print('Error: Failed to retrieve %s from the following locations '
'(in order):' % pac.filename, file=sys.stderr)
print('\n'.join(pac.urllist), file=sys.stderr)
sys.exit(1)
finally:
if os.path.exists(tmpfile.name):
os.unlink(tmpfile.name)
def move_package(self, tmpfile, destdir, pac_obj=None):
canonname = None
if pac_obj and pac_obj.name.startswith('container:'):
canonname = pac_obj.canonname
if canonname is None:
pkgq = packagequery.PackageQuery.query(tmpfile, extra_rpmtags=(1044, 1051, 1052))
if pkgq:
canonname = pkgq.canonname()
else:
if pac_obj is None:
print('Unsupported file type: ', tmpfile, file=sys.stderr)
sys.exit(1)
canonname = pac_obj.binary
decoded_canonname = decode_it(canonname)
if b'/' in canonname or '/' in decoded_canonname:
raise oscerr.OscIOError(None, 'canonname contains a slash')
fullfilename = os.path.join(destdir, decoded_canonname)
if pac_obj is not None:
pac_obj.canonname = canonname
pac_obj.fullfilename = fullfilename
shutil.move(tmpfile, fullfilename)
os.chmod(fullfilename, 0o644)
def dirSetup(self, pac):
dir = os.path.join(self.cachedir, pac.localdir)
if not os.path.exists(dir):
try:
os.makedirs(dir, mode=0o755)
except OSError as e:
print('packagecachedir is not writable for you?', file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
def _build_urllist(self, buildinfo, pac):
if self.download_api_only:
return []
urllist = self.urllist
key = '%s/%s' % (pac.project, pac.repository)
project_repo_url = buildinfo.urls.get(key)
if project_repo_url is not None:
urllist = [project_repo_url]
return urllist
def run(self, buildinfo):
cached = 0
all = len(buildinfo.deps)
for i in buildinfo.deps:
urllist = self._build_urllist(buildinfo, i)
i.makeurls(self.cachedir, urllist)
# find container extension by looking in the cache
if i.name.startswith('container:') and i.fullfilename.endswith('.tar.xz'):
for ext in ['.tar.xz', '.tar.gz', '.tar']:
if os.path.exists(i.fullfilename[:-7] + ext):
i.canonname = i.canonname[:-7] + ext
i.makeurls(self.cachedir, urllist)
if os.path.exists(i.fullfilename):
cached += 1
if not i.name.startswith('container:') and i.pacsuffix != 'rpm':
continue
if i.hdrmd5:
if i.name.startswith('container:'):
hdrmd5 = dgst(i.fullfilename)
else:
hdrmd5 = packagequery.PackageQuery.queryhdrmd5(i.fullfilename)
if not hdrmd5 or hdrmd5 != i.hdrmd5:
os.unlink(i.fullfilename)
cached -= 1
miss = 0
needed = all - cached
if all:
miss = 100.0 * needed / all
print("%.1f%% cache miss. %d/%d dependencies cached.\n" % (miss, cached, all))
done = 1
for i in buildinfo.deps:
if not os.path.exists(i.fullfilename):
if self.offline:
raise oscerr.OscIOError(None,
'Missing \'%s\' in cache: '
'--offline not possible.' %
i.fullfilename)
self.dirSetup(i)
try:
# if there isn't a progress bar, there is no output at all
prefix = ''
if not self.progress_obj:
print('%d/%d (%s) %s' % (done, needed, i.project, i.filename))
else:
prefix = '[%d/%d] ' % (done, needed)
self.fetch(i, prefix=prefix)
if not os.path.isfile(i.fullfilename):
# if the file wasn't downloaded and cannot be found on disk,
# mark it for downloading from the API
self.__add_cpio(i)
else:
# if the checksum of the downloaded package doesn't match,
# delete it and mark it for downloading from the API
#
# wbrown 2022 - is there a reason to keep these md5's at all? md5 is
# broken from a security POV so these aren't a trusted source for validation
# of the file content. They are often incorrect forcing download via the API
# which for anyone outside the EU is excruciating. And when they are ignored
# builds work and progress anyway? So what do they even do? What are they
# for? They should just be removed.
hdrmd5 = packagequery.PackageQuery.queryhdrmd5(i.fullfilename)
if not hdrmd5 or hdrmd5 != i.hdrmd5:
print('%s/%s: allowing invalid file, probably an OBS bug - hdrmd5 did not match - %s != %s'
% (i.project, i.name, hdrmd5, i.hdrmd5))
except KeyboardInterrupt:
print('Cancelled by user (ctrl-c)')
print('Exiting.')
sys.exit(0)
done += 1
self.__fetch_cpio(buildinfo.apiurl)
prjs = list(buildinfo.projects.keys())
for i in prjs:
dest = "%s/%s" % (self.cachedir, i)
if not os.path.exists(dest):
os.makedirs(dest, mode=0o755)
dest += '/_pubkey'
url = makeurl(buildinfo.apiurl, ['source', i, '_pubkey'])
try_parent = False
try:
if self.offline and not os.path.exists(dest):
# may need to try parent
try_parent = True
elif not self.offline:
OscFileGrabber().urlgrab(url, dest)
# not that many keys usually
if i not in buildinfo.prjkeys and not try_parent:
buildinfo.keys.append(dest)
buildinfo.prjkeys.append(i)
except KeyboardInterrupt:
print('Cancelled by user (ctrl-c)')
print('Exiting.')
if os.path.exists(dest):
os.unlink(dest)
sys.exit(0)
except HTTPError as e:
# Not found is okay, let's go to the next project
if e.code != 404:
print("Invalid answer from server", e, file=sys.stderr)
sys.exit(1)
try_parent = True
if try_parent:
if self.http_debug:
print("can't fetch key for %s" % (i), file=sys.stderr)
print("url: %s" % url, file=sys.stderr)
if os.path.exists(dest):
os.unlink(dest)
l = i.rsplit(':', 1)
# try key from parent project
if len(l) > 1 and l[1] and not l[0] in buildinfo.projects:
prjs.append(l[0])
def verify_pacs_old(pac_list):
"""Take a list of rpm filenames and run rpm -K on them.
In case of failure, exit.
Check all packages in one go, since this takes only 6 seconds on my Athlon 700
instead of 20 when calling 'rpm -K' for each of them.
"""
if not pac_list:
return
# don't care about the return value because we check the
# output anyway, and rpm always writes to stdout.
# save locale first (we rely on English rpm output here)
saved_LC_ALL = os.environ.get('LC_ALL')
os.environ['LC_ALL'] = 'en_EN'
o = subprocess.Popen(['rpm', '-K'] + pac_list, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, close_fds=True).stdout
# restore locale
if saved_LC_ALL:
os.environ['LC_ALL'] = saved_LC_ALL
else:
os.environ.pop('LC_ALL')
for line in o.readlines():
if 'OK' not in line:
print()
print('The following package could not be verified:', file=sys.stderr)
print(line, file=sys.stderr)
sys.exit(1)
if 'NOT OK' in line:
print()
print('The following package could not be verified:', file=sys.stderr)
print(line, file=sys.stderr)
if 'MISSING KEYS' in line:
missing_key = line.split('#')[-1].split(')')[0]
print("""
- If the key (%(name)s) is missing, install it first.
For example, do the following:
osc signkey PROJECT > file
and, as root:
rpm --import %(dir)s/keyfile-%(name)s
Then, just start the build again.
- If you do not trust the packages, you should configure osc build for XEN or KVM
- You may use --no-verify to skip the verification (which is a risk for your system).
""" % {'name': missing_key,
'dir': os.path.expanduser('~')}, file=sys.stderr)
else:
print("""
- If the signature is wrong, you may try deleting the package manually
and re-run this program, so it is fetched again.
""", file=sys.stderr)
sys.exit(1)
def verify_pacs(bi):
"""Take a list of rpm filenames and verify their signatures.
In case of failure, exit.
"""
pac_list = [i.fullfilename for i in bi.deps]
if conf.config['builtin_signature_check'] is not True:
return verify_pacs_old(pac_list)
if not pac_list:
return
if not bi.keys:
raise oscerr.APIError("can't verify packages due to lack of GPG keys")
print("using keys from", ', '.join(bi.prjkeys))
failed = False
checker = osc_checker.Checker()
try:
checker.readkeys(bi.keys)
for pkg in pac_list:
try:
checker.check(pkg)
except Exception as e:
failed = True
print(pkg, ':', e)
except:
checker.cleanup()
raise
if failed:
checker.cleanup()
sys.exit(1)
checker.cleanup()
# vim: sw=4 et