1
0
mirror of https://github.com/openSUSE/osc.git synced 2024-12-25 01:16:14 +01:00
github.com_openSUSE_osc/osc/fetch.py

434 lines
17 KiB
Python

# Copyright (C) 2006 Novell Inc. All rights reserved.
# This program is free software; it may be used, copied, modified
# and distributed under the terms of the GNU General Public Licence,
# either version 2, or (at your option) any later version.
import os
import re
import shutil
import subprocess
import sys
import tempfile
from urllib.parse import quote_plus
from urllib.request import HTTPError
from . import checker as osc_checker
from . import conf
from . import oscerr
from .core import makeurl, dgst
from .grabber import OscFileGrabber, OscMirrorGroup
from .meter import create_text_meter
from .util import packagequery, cpio
from .util.helper import decode_it
class Fetcher:
def __init__(self, cachedir='/tmp', urllist=None,
http_debug=False, cookiejar=None, offline=False,
enable_cpio=True, modules=None, download_api_only=False):
# set up progress bar callback
self.progress_obj = None
if sys.stdout.isatty():
self.progress_obj = create_text_meter(use_pb_fallback=False)
self.cachedir = cachedir
# generic download URL lists
self.urllist = urllist or []
self.modules = modules or []
self.http_debug = http_debug
self.offline = offline
self.cpio = {}
self.enable_cpio = enable_cpio
self.download_api_only = download_api_only
self.gr = OscFileGrabber(progress_obj=self.progress_obj)
def __add_cpio(self, pac):
prpap = '%s/%s/%s/%s' % (pac.project, pac.repository, pac.repoarch, pac.repopackage)
self.cpio.setdefault(prpap, {})[pac.repofilename] = pac
def __download_cpio_archive(self, apiurl, project, repo, arch, package, **pkgs):
if not pkgs:
return
query = ['binary=%s' % quote_plus(i) for i in pkgs]
query.append('view=cpio')
for module in self.modules:
query.append('module=' + module)
try:
url = makeurl(apiurl, ['build', project, repo, arch, package], query=query)
sys.stdout.write("preparing download ...\r")
sys.stdout.flush()
with tempfile.NamedTemporaryFile(prefix='osc_build_cpio') as tmparchive:
self.gr.urlgrab(url, filename=tmparchive.name,
text='fetching packages for \'%s\'' % project)
archive = cpio.CpioRead(tmparchive.name)
archive.read()
for hdr in archive:
# XXX: we won't have an .errors file because we're using
# getbinarylist instead of the public/... route
# (which is routed to getbinaries)
# getbinaries does not support kiwi builds
if hdr.filename == b'.errors':
archive.copyin_file(hdr.filename)
raise oscerr.APIError('CPIO archive is incomplete '
'(see .errors file)')
if package == '_repository':
n = re.sub(br'\.pkg\.tar\.(zst|.z)$', b'.arch', hdr.filename)
if n.startswith(b'container:'):
n = re.sub(br'\.tar\.(zst|.z)$', b'.tar', hdr.filename)
pac = pkgs[decode_it(n.rsplit(b'.', 1)[0])]
pac.canonname = hdr.filename
else:
pac = pkgs[decode_it(n.rsplit(b'.', 1)[0])]
else:
# this is a kiwi product
pac = pkgs[decode_it(hdr.filename)]
# Extract a single file from the cpio archive
fd = None
tmpfile = None
try:
fd, tmpfile = tempfile.mkstemp(prefix='osc_build_file')
archive.copyin_file(hdr.filename,
decode_it(os.path.dirname(tmpfile)),
decode_it(os.path.basename(tmpfile)))
self.move_package(tmpfile, pac.localdir, pac)
finally:
if fd is not None:
os.close(fd)
if tmpfile is not None and os.path.exists(tmpfile):
os.unlink(tmpfile)
for pac in pkgs.values():
if not os.path.isfile(pac.fullfilename):
raise oscerr.APIError('failed to fetch file \'%s\': '
'missing in CPIO archive' %
pac.repofilename)
except HTTPError as e:
if e.code != 414:
raise
# query str was too large
keys = list(pkgs.keys())
if len(keys) == 1:
raise oscerr.APIError('unable to fetch cpio archive: '
'server always returns code 414')
n = int(len(pkgs) / 2)
new_pkgs = {k: pkgs[k] for k in keys[:n]}
self.__download_cpio_archive(apiurl, project, repo, arch,
package, **new_pkgs)
new_pkgs = {k: pkgs[k] for k in keys[n:]}
self.__download_cpio_archive(apiurl, project, repo, arch,
package, **new_pkgs)
def __fetch_cpio(self, apiurl):
for prpap, pkgs in self.cpio.items():
project, repo, arch, package = prpap.split('/', 3)
self.__download_cpio_archive(apiurl, project, repo, arch, package, **pkgs)
def fetch(self, pac, prefix=''):
# for use by the failure callback
self.curpac = pac
mg = OscMirrorGroup(self.gr, pac.urllist)
if self.http_debug:
print('\nURLs to try for package \'%s\':' % pac, file=sys.stderr)
print('\n'.join(pac.urllist), file=sys.stderr)
print(file=sys.stderr)
try:
with tempfile.NamedTemporaryFile(prefix='osc_build',
delete=False) as tmpfile:
mg_stat = mg.urlgrab(pac.filename, filename=tmpfile.name,
text='%s(%s) %s' % (prefix, pac.project, pac.filename))
if mg_stat:
self.move_package(tmpfile.name, pac.localdir, pac)
if not mg_stat:
if self.enable_cpio:
print('%s/%s: attempting download from api, since not found'
% (pac.project, pac.name))
self.__add_cpio(pac)
return
print()
print('Error: Failed to retrieve %s from the following locations '
'(in order):' % pac.filename, file=sys.stderr)
print('\n'.join(pac.urllist), file=sys.stderr)
sys.exit(1)
finally:
if os.path.exists(tmpfile.name):
os.unlink(tmpfile.name)
def move_package(self, tmpfile, destdir, pac_obj=None):
canonname = None
if pac_obj and pac_obj.name.startswith('container:'):
canonname = pac_obj.canonname
if canonname is None:
pkgq = packagequery.PackageQuery.query(tmpfile, extra_rpmtags=(1044, 1051, 1052))
if pkgq:
canonname = pkgq.canonname()
else:
if pac_obj is None:
print('Unsupported file type: ', tmpfile, file=sys.stderr)
sys.exit(1)
canonname = pac_obj.binary
decoded_canonname = decode_it(canonname)
if b'/' in canonname or '/' in decoded_canonname:
raise oscerr.OscIOError(None, 'canonname contains a slash')
fullfilename = os.path.join(destdir, decoded_canonname)
if pac_obj is not None:
pac_obj.canonname = canonname
pac_obj.fullfilename = fullfilename
shutil.move(tmpfile, fullfilename)
os.chmod(fullfilename, 0o644)
def dirSetup(self, pac):
dir = os.path.join(self.cachedir, pac.localdir)
if not os.path.exists(dir):
try:
os.makedirs(dir, mode=0o755)
except OSError as e:
print('packagecachedir is not writable for you?', file=sys.stderr)
print(e, file=sys.stderr)
sys.exit(1)
def _build_urllist(self, buildinfo, pac):
if self.download_api_only:
return []
urllist = self.urllist
key = '%s/%s' % (pac.project, pac.repository)
project_repo_url = buildinfo.urls.get(key)
if project_repo_url is not None:
urllist = [project_repo_url]
return urllist
def run(self, buildinfo):
cached = 0
all = len(buildinfo.deps)
for i in buildinfo.deps:
urllist = self._build_urllist(buildinfo, i)
i.makeurls(self.cachedir, urllist)
# find container extension by looking in the cache
if i.name.startswith('container:') and i.fullfilename.endswith('.tar.xz'):
for ext in ['.tar.xz', '.tar.gz', '.tar']:
if os.path.exists(i.fullfilename[:-7] + ext):
i.canonname = i.canonname[:-7] + ext
i.makeurls(self.cachedir, urllist)
if os.path.exists(i.fullfilename):
cached += 1
if not i.name.startswith('container:') and i.pacsuffix != 'rpm':
continue
if i.hdrmd5:
if i.name.startswith('container:'):
hdrmd5 = dgst(i.fullfilename)
else:
hdrmd5 = packagequery.PackageQuery.queryhdrmd5(i.fullfilename)
if not hdrmd5 or hdrmd5 != i.hdrmd5:
os.unlink(i.fullfilename)
cached -= 1
miss = 0
needed = all - cached
if all:
miss = 100.0 * needed / all
print("%.1f%% cache miss. %d/%d dependencies cached.\n" % (miss, cached, all))
done = 1
for i in buildinfo.deps:
if not os.path.exists(i.fullfilename):
if self.offline:
raise oscerr.OscIOError(None,
'Missing \'%s\' in cache: '
'--offline not possible.' %
i.fullfilename)
self.dirSetup(i)
try:
# if there isn't a progress bar, there is no output at all
prefix = ''
if not self.progress_obj:
print('%d/%d (%s) %s' % (done, needed, i.project, i.filename))
else:
prefix = '[%d/%d] ' % (done, needed)
self.fetch(i, prefix=prefix)
if not os.path.isfile(i.fullfilename):
# if the file wasn't downloaded and cannot be found on disk,
# mark it for downloading from the API
self.__add_cpio(i)
else:
# if the checksum of the downloaded package doesn't match,
# delete it and mark it for downloading from the API
#
# wbrown 2022 - is there a reason to keep these md5's at all? md5 is
# broken from a security POV so these aren't a trusted source for validation
# of the file content. They are often incorrect forcing download via the API
# which for anyone outside the EU is excruciating. And when they are ignored
# builds work and progress anyway? So what do they even do? What are they
# for? They should just be removed.
hdrmd5 = packagequery.PackageQuery.queryhdrmd5(i.fullfilename)
if not hdrmd5 or hdrmd5 != i.hdrmd5:
print('%s/%s: allowing invalid file, probably an OBS bug - hdrmd5 did not match - %s != %s'
% (i.project, i.name, hdrmd5, i.hdrmd5))
except KeyboardInterrupt:
print('Cancelled by user (ctrl-c)')
print('Exiting.')
sys.exit(0)
done += 1
self.__fetch_cpio(buildinfo.apiurl)
prjs = list(buildinfo.projects.keys())
for i in prjs:
dest = "%s/%s" % (self.cachedir, i)
if not os.path.exists(dest):
os.makedirs(dest, mode=0o755)
dest += '/_pubkey'
url = makeurl(buildinfo.apiurl, ['source', i, '_pubkey'])
try_parent = False
try:
if self.offline and not os.path.exists(dest):
# may need to try parent
try_parent = True
elif not self.offline:
OscFileGrabber().urlgrab(url, dest)
# not that many keys usually
if i not in buildinfo.prjkeys and not try_parent:
buildinfo.keys.append(dest)
buildinfo.prjkeys.append(i)
except KeyboardInterrupt:
print('Cancelled by user (ctrl-c)')
print('Exiting.')
if os.path.exists(dest):
os.unlink(dest)
sys.exit(0)
except HTTPError as e:
# Not found is okay, let's go to the next project
if e.code != 404:
print("Invalid answer from server", e, file=sys.stderr)
sys.exit(1)
try_parent = True
if try_parent:
if self.http_debug:
print("can't fetch key for %s" % (i), file=sys.stderr)
print("url: %s" % url, file=sys.stderr)
if os.path.exists(dest):
os.unlink(dest)
l = i.rsplit(':', 1)
# try key from parent project
if len(l) > 1 and l[1] and not l[0] in buildinfo.projects:
prjs.append(l[0])
def verify_pacs_old(pac_list):
"""Take a list of rpm filenames and run rpm -K on them.
In case of failure, exit.
Check all packages in one go, since this takes only 6 seconds on my Athlon 700
instead of 20 when calling 'rpm -K' for each of them.
"""
if not pac_list:
return
# don't care about the return value because we check the
# output anyway, and rpm always writes to stdout.
# save locale first (we rely on English rpm output here)
saved_LC_ALL = os.environ.get('LC_ALL')
os.environ['LC_ALL'] = 'en_EN'
o = subprocess.Popen(['rpm', '-K'] + pac_list, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, close_fds=True).stdout
# restore locale
if saved_LC_ALL:
os.environ['LC_ALL'] = saved_LC_ALL
else:
os.environ.pop('LC_ALL')
for line in o.readlines():
if 'OK' not in line:
print()
print('The following package could not be verified:', file=sys.stderr)
print(line, file=sys.stderr)
sys.exit(1)
if 'NOT OK' in line:
print()
print('The following package could not be verified:', file=sys.stderr)
print(line, file=sys.stderr)
if 'MISSING KEYS' in line:
missing_key = line.split('#')[-1].split(')')[0]
print("""
- If the key (%(name)s) is missing, install it first.
For example, do the following:
osc signkey PROJECT > file
and, as root:
rpm --import %(dir)s/keyfile-%(name)s
Then, just start the build again.
- If you do not trust the packages, you should configure osc build for XEN or KVM
- You may use --no-verify to skip the verification (which is a risk for your system).
""" % {'name': missing_key,
'dir': os.path.expanduser('~')}, file=sys.stderr)
else:
print("""
- If the signature is wrong, you may try deleting the package manually
and re-run this program, so it is fetched again.
""", file=sys.stderr)
sys.exit(1)
def verify_pacs(bi):
"""Take a list of rpm filenames and verify their signatures.
In case of failure, exit.
"""
pac_list = [i.fullfilename for i in bi.deps]
if conf.config['builtin_signature_check'] is not True:
return verify_pacs_old(pac_list)
if not pac_list:
return
if not bi.keys:
raise oscerr.APIError("can't verify packages due to lack of GPG keys")
print("using keys from", ', '.join(bi.prjkeys))
failed = False
checker = osc_checker.Checker()
try:
checker.readkeys(bi.keys)
for pkg in pac_list:
try:
checker.check(pkg)
except Exception as e:
failed = True
print(pkg, ':', e)
except:
checker.cleanup()
raise
if failed:
checker.cleanup()
sys.exit(1)
checker.cleanup()
# vim: sw=4 et