# Copyright (C) 2006 Novell Inc.  All rights reserved.
# This program is free software; it may be used, copied, modified
# and distributed under the terms of the GNU General Public Licence,
# either version 2, or (at your option) any later version.


import os
import re
import shutil
import subprocess
import sys
import tempfile
from urllib.parse import quote_plus
from urllib.request import HTTPError

from . import checker as osc_checker
from . import conf
from . import oscerr
from .core import makeurl, dgst
from .grabber import OscFileGrabber, OscMirrorGroup
from .meter import create_text_meter
from .util import packagequery, cpio
from .util.helper import decode_it


class Fetcher:
    def __init__(self, cachedir='/tmp', urllist=None,
                 http_debug=False, cookiejar=None, offline=False,
                 enable_cpio=True, modules=None, download_api_only=False):
        # set up progress bar callback
        self.progress_obj = None
        if sys.stdout.isatty():
            self.progress_obj = create_text_meter(use_pb_fallback=False)

        self.cachedir = cachedir
        # generic download URL lists
        self.urllist = urllist or []
        self.modules = modules or []
        self.http_debug = http_debug
        self.offline = offline
        self.cpio = {}
        self.enable_cpio = enable_cpio
        self.download_api_only = download_api_only

        self.gr = OscFileGrabber(progress_obj=self.progress_obj)

    def __add_cpio(self, pac):
        prpap = '%s/%s/%s/%s' % (pac.project, pac.repository, pac.repoarch, pac.repopackage)
        self.cpio.setdefault(prpap, {})[pac.repofilename] = pac

    def __download_cpio_archive(self, apiurl, project, repo, arch, package, **pkgs):
        if not pkgs:
            return
        query = ['binary=%s' % quote_plus(i) for i in pkgs]
        query.append('view=cpio')
        for module in self.modules:
            query.append('module=' + module)
        try:
            url = makeurl(apiurl, ['build', project, repo, arch, package], query=query)
            sys.stdout.write("preparing download ...\r")
            sys.stdout.flush()
            with tempfile.NamedTemporaryFile(prefix='osc_build_cpio') as tmparchive:
                self.gr.urlgrab(url, filename=tmparchive.name,
                                text='fetching packages for \'%s\'' % project)
                archive = cpio.CpioRead(tmparchive.name)
                archive.read()
                for hdr in archive:
                    # XXX: we won't have an .errors file because we're using
                    # getbinarylist instead of the public/... route
                    # (which is routed to getbinaries)
                    # getbinaries does not support kiwi builds
                    if hdr.filename == b'.errors':
                        archive.copyin_file(hdr.filename)
                        raise oscerr.APIError('CPIO archive is incomplete '
                                              '(see .errors file)')
                    if package == '_repository':
                        n = re.sub(br'\.pkg\.tar\.(zst|.z)$', b'.arch', hdr.filename)
                        if n.startswith(b'container:'):
                            n = re.sub(br'\.tar\.(zst|.z)$', b'.tar', hdr.filename)
                            pac = pkgs[decode_it(n.rsplit(b'.', 1)[0])]
                            pac.canonname = hdr.filename
                        else:
                            pac = pkgs[decode_it(n.rsplit(b'.', 1)[0])]
                    else:
                        # this is a kiwi product
                        pac = pkgs[decode_it(hdr.filename)]

                    # Extract a single file from the cpio archive
                    fd = None
                    tmpfile = None
                    try:
                        fd, tmpfile = tempfile.mkstemp(prefix='osc_build_file')
                        archive.copyin_file(hdr.filename,
                                            decode_it(os.path.dirname(tmpfile)),
                                            decode_it(os.path.basename(tmpfile)))
                        self.move_package(tmpfile, pac.localdir, pac)
                    finally:
                        if fd is not None:
                            os.close(fd)
                        if tmpfile is not None and os.path.exists(tmpfile):
                            os.unlink(tmpfile)

                for pac in pkgs.values():
                    if not os.path.isfile(pac.fullfilename):
                        raise oscerr.APIError('failed to fetch file \'%s\': '
                                              'missing in CPIO archive' %
                                              pac.repofilename)
        except HTTPError as e:
            if e.code != 414:
                raise
            # query str was too large
            keys = list(pkgs.keys())
            if len(keys) == 1:
                raise oscerr.APIError('unable to fetch cpio archive: '
                                      'server always returns code 414')
            n = int(len(pkgs) / 2)
            new_pkgs = {k: pkgs[k] for k in keys[:n]}
            self.__download_cpio_archive(apiurl, project, repo, arch,
                                         package, **new_pkgs)
            new_pkgs = {k: pkgs[k] for k in keys[n:]}
            self.__download_cpio_archive(apiurl, project, repo, arch,
                                         package, **new_pkgs)

    def __fetch_cpio(self, apiurl):
        for prpap, pkgs in self.cpio.items():
            project, repo, arch, package = prpap.split('/', 3)
            self.__download_cpio_archive(apiurl, project, repo, arch, package, **pkgs)

    def fetch(self, pac, prefix=''):
        # for use by the failure callback
        self.curpac = pac

        mg = OscMirrorGroup(self.gr, pac.urllist)

        if self.http_debug:
            print('\nURLs to try for package \'%s\':' % pac, file=sys.stderr)
            print('\n'.join(pac.urllist), file=sys.stderr)
            print(file=sys.stderr)

        try:
            with tempfile.NamedTemporaryFile(prefix='osc_build',
                                             delete=False) as tmpfile:
                mg_stat = mg.urlgrab(pac.filename, filename=tmpfile.name,
                                     text='%s(%s) %s' % (prefix, pac.project, pac.filename))
                if mg_stat:
                    self.move_package(tmpfile.name, pac.localdir, pac)

            if not mg_stat:
                if self.enable_cpio:
                    print('%s/%s: attempting download from api, since not found'
                          % (pac.project, pac.name))
                    self.__add_cpio(pac)
                    return
                print()
                print('Error: Failed to retrieve %s from the following locations '
                      '(in order):' % pac.filename, file=sys.stderr)
                print('\n'.join(pac.urllist), file=sys.stderr)
                sys.exit(1)
        finally:
            if os.path.exists(tmpfile.name):
                os.unlink(tmpfile.name)

    def move_package(self, tmpfile, destdir, pac_obj=None):
        canonname = None
        if pac_obj and pac_obj.name.startswith('container:'):
            canonname = pac_obj.canonname
        if canonname is None:
            pkgq = packagequery.PackageQuery.query(tmpfile, extra_rpmtags=(1044, 1051, 1052))
            if pkgq:
                canonname = pkgq.canonname()
            else:
                if pac_obj is None:
                    print('Unsupported file type: ', tmpfile, file=sys.stderr)
                    sys.exit(1)
                canonname = pac_obj.binary
        decoded_canonname = decode_it(canonname)
        if b'/' in canonname or '/' in decoded_canonname:
            raise oscerr.OscIOError(None, 'canonname contains a slash')

        fullfilename = os.path.join(destdir, decoded_canonname)
        if pac_obj is not None:
            pac_obj.canonname = canonname
            pac_obj.fullfilename = fullfilename
        shutil.move(tmpfile, fullfilename)
        os.chmod(fullfilename, 0o644)

    def dirSetup(self, pac):
        dir = os.path.join(self.cachedir, pac.localdir)
        if not os.path.exists(dir):
            try:
                os.makedirs(dir, mode=0o755)
            except OSError as e:
                print('packagecachedir is not writable for you?', file=sys.stderr)
                print(e, file=sys.stderr)
                sys.exit(1)

    def _build_urllist(self, buildinfo, pac):
        if self.download_api_only:
            return []
        urllist = self.urllist
        key = '%s/%s' % (pac.project, pac.repository)
        project_repo_url = buildinfo.urls.get(key)
        if project_repo_url is not None:
            urllist = [project_repo_url]
        return urllist

    def run(self, buildinfo):
        cached = 0
        all = len(buildinfo.deps)
        for i in buildinfo.deps:
            urllist = self._build_urllist(buildinfo, i)
            i.makeurls(self.cachedir, urllist)
            # find container extension by looking in the cache
            if i.name.startswith('container:') and i.fullfilename.endswith('.tar.xz'):
                for ext in ['.tar.xz', '.tar.gz', '.tar']:
                    if os.path.exists(i.fullfilename[:-7] + ext):
                        i.canonname = i.canonname[:-7] + ext
                        i.makeurls(self.cachedir, urllist)

            if os.path.exists(i.fullfilename):
                cached += 1
                if not i.name.startswith('container:') and i.pacsuffix != 'rpm':
                    continue
                if i.hdrmd5:
                    if i.name.startswith('container:'):
                        hdrmd5 = dgst(i.fullfilename)
                    else:
                        hdrmd5 = packagequery.PackageQuery.queryhdrmd5(i.fullfilename)
                    if not hdrmd5 or hdrmd5 != i.hdrmd5:
                        os.unlink(i.fullfilename)
                        cached -= 1

        miss = 0
        needed = all - cached
        if all:
            miss = 100.0 * needed / all
        print("%.1f%% cache miss. %d/%d dependencies cached.\n" % (miss, cached, all))
        done = 1
        for i in buildinfo.deps:
            if not os.path.exists(i.fullfilename):
                if self.offline:
                    raise oscerr.OscIOError(None,
                                            'Missing \'%s\' in cache: '
                                            '--offline not possible.' %
                                            i.fullfilename)
                self.dirSetup(i)
                try:
                    # if there isn't a progress bar, there is no output at all
                    prefix = ''
                    if not self.progress_obj:
                        print('%d/%d (%s) %s' % (done, needed, i.project, i.filename))
                    else:
                        prefix = '[%d/%d] ' % (done, needed)
                    self.fetch(i, prefix=prefix)

                    if not os.path.isfile(i.fullfilename):
                        # if the file wasn't downloaded and cannot be found on disk,
                        # mark it for downloading from the API
                        self.__add_cpio(i)
                    else:
                        # if the checksum of the downloaded package doesn't match,
                        # delete it and mark it for downloading from the API
                        #
                        # wbrown 2022 - is there a reason to keep these md5's at all? md5 is
                        # broken from a security POV so these aren't a trusted source for validation
                        # of the file content. They are often incorrect forcing download via the API
                        # which for anyone outside the EU is excruciating. And when they are ignored
                        # builds work and progress anyway? So what do they even do? What are they
                        # for? They should just be removed.
                        hdrmd5 = packagequery.PackageQuery.queryhdrmd5(i.fullfilename)
                        if not hdrmd5 or hdrmd5 != i.hdrmd5:
                            print('%s/%s: allowing invalid file, probably an OBS bug - hdrmd5 did not match - %s != %s'
                                % (i.project, i.name, hdrmd5, i.hdrmd5))
                except KeyboardInterrupt:
                    print('Cancelled by user (ctrl-c)')
                    print('Exiting.')
                    sys.exit(0)
                done += 1

        self.__fetch_cpio(buildinfo.apiurl)

        prjs = list(buildinfo.projects.keys())
        for i in prjs:
            dest = "%s/%s" % (self.cachedir, i)
            if not os.path.exists(dest):
                os.makedirs(dest, mode=0o755)
            dest += '/_pubkey'

            url = makeurl(buildinfo.apiurl, ['source', i, '_pubkey'])
            try_parent = False
            try:
                if self.offline and not os.path.exists(dest):
                    # may need to try parent
                    try_parent = True
                elif not self.offline:
                    OscFileGrabber().urlgrab(url, dest)
                # not that many keys usually
                if i not in buildinfo.prjkeys and not try_parent:
                    buildinfo.keys.append(dest)
                    buildinfo.prjkeys.append(i)
            except KeyboardInterrupt:
                print('Cancelled by user (ctrl-c)')
                print('Exiting.')
                if os.path.exists(dest):
                    os.unlink(dest)
                sys.exit(0)
            except HTTPError as e:
                # Not found is okay, let's go to the next project
                if e.code != 404:
                    print("Invalid answer from server", e, file=sys.stderr)
                    sys.exit(1)
                try_parent = True

            if try_parent:
                if self.http_debug:
                    print("can't fetch key for %s" % (i), file=sys.stderr)
                    print("url: %s" % url, file=sys.stderr)

                if os.path.exists(dest):
                    os.unlink(dest)

                l = i.rsplit(':', 1)
                # try key from parent project
                if len(l) > 1 and l[1] and not l[0] in buildinfo.projects:
                    prjs.append(l[0])


def verify_pacs_old(pac_list):
    """Take a list of rpm filenames and run rpm -K on them.

       In case of failure, exit.

       Check all packages in one go, since this takes only 6 seconds on my Athlon 700
       instead of 20 when calling 'rpm -K' for each of them.
       """
    if not pac_list:
        return

    # don't care about the return value because we check the
    # output anyway, and rpm always writes to stdout.

    # save locale first (we rely on English rpm output here)
    saved_LC_ALL = os.environ.get('LC_ALL')
    os.environ['LC_ALL'] = 'en_EN'

    o = subprocess.Popen(['rpm', '-K'] + pac_list, stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT, close_fds=True).stdout

    # restore locale
    if saved_LC_ALL:
        os.environ['LC_ALL'] = saved_LC_ALL
    else:
        os.environ.pop('LC_ALL')

    for line in o.readlines():

        if 'OK' not in line:
            print()
            print('The following package could not be verified:', file=sys.stderr)
            print(line, file=sys.stderr)
            sys.exit(1)

        if 'NOT OK' in line:
            print()
            print('The following package could not be verified:', file=sys.stderr)
            print(line, file=sys.stderr)

            if 'MISSING KEYS' in line:
                missing_key = line.split('#')[-1].split(')')[0]

                print("""
- If the key (%(name)s) is missing, install it first.
  For example, do the following:
    osc signkey PROJECT > file
  and, as root:
    rpm --import %(dir)s/keyfile-%(name)s

  Then, just start the build again.

- If you do not trust the packages, you should configure osc build for XEN or KVM

- You may use --no-verify to skip the verification (which is a risk for your system).
""" % {'name': missing_key,
                    'dir': os.path.expanduser('~')}, file=sys.stderr)

            else:
                print("""
- If the signature is wrong, you may try deleting the package manually
  and re-run this program, so it is fetched again.
""", file=sys.stderr)

            sys.exit(1)


def verify_pacs(bi):
    """Take a list of rpm filenames and verify their signatures.

       In case of failure, exit.
       """

    pac_list = [i.fullfilename for i in bi.deps]
    if conf.config['builtin_signature_check'] is not True:
        return verify_pacs_old(pac_list)

    if not pac_list:
        return

    if not bi.keys:
        raise oscerr.APIError("can't verify packages due to lack of GPG keys")

    print("using keys from", ', '.join(bi.prjkeys))

    failed = False
    checker = osc_checker.Checker()
    try:
        checker.readkeys(bi.keys)
        for pkg in pac_list:
            try:
                checker.check(pkg)
            except Exception as e:
                failed = True
                print(pkg, ':', e)
    except:
        checker.cleanup()
        raise

    if failed:
        checker.cleanup()
        sys.exit(1)

    checker.cleanup()

# vim: sw=4 et