openSUSE-release-tools/repo_checker.py

#!/usr/bin/python

from __future__ import print_function

import cmdln
from collections import namedtuple
import hashlib
from lxml import etree as ET
import os
from osc.core import show_results_meta
import pipes
import re
import subprocess
import sys
import tempfile

from osclib.comments import CommentAPI
from osclib.core import binary_list
from osclib.core import depends_on
from osclib.core import devel_project_fallback
from osclib.core import fileinfo_ext_all
from osclib.core import package_binary_list
from osclib.core import request_staged
from osclib.core import target_archs
from osclib.cycle import CycleDetector
from osclib.memoize import CACHEDIR

import ReviewBot

SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
CheckResult = namedtuple('CheckResult', ('success', 'comment'))
INSTALL_REGEX = r"^(?:can't install (.*?)|found conflict of (.*?) with (.*?)):$"
InstallSection = namedtuple('InstallSection', ('binaries', 'text'))

class RepoChecker(ReviewBot.ReviewBot):
    def __init__(self, *args, **kwargs):
        ReviewBot.ReviewBot.__init__(self, *args, **kwargs)

        # ReviewBot options.
        self.only_one_action = True
        self.request_default_return = True
        self.comment_handler = True

        # RepoChecker options.
        self.skip_cycle = False
        self.force = False
        self.limit_group = None

    def repository_published(self, project):
        root = ET.fromstringlist(show_results_meta(
            self.apiurl, project, multibuild=True, repository=['standard']))
        return not len(root.xpath('result[@state!="published"]'))

    def project_only(self, project, post_comments=False):
        # self.staging_config needed by target_archs().
        api = self.staging_api(project)

        if not self.force and not self.repository_published(project):
            self.logger.info('{}/standard not published'.format(project))
            return

        build = ET.fromstringlist(show_results_meta(
            self.apiurl, project, multibuild=True, repository=['standard'])).get('state')
        dashboard_content = api.dashboard_content_load('repo_checker')
        if not self.force and dashboard_content:
            build_previous = dashboard_content.splitlines()[0]
            if build == build_previous:
                self.logger.info('{} build unchanged'.format(project))
                return

        comment = [build]
        for arch in self.target_archs(project):
            directory_project = self.mirror(project, arch)

            parse = project if post_comments else False
            results = {
                'cycle': CheckResult(True, None),
                'install': self.install_check(project, [directory_project], arch, parse=parse),
            }

            if not all(result.success for _, result in results.items()):
                self.result_comment(arch, results, comment)

        text = '\n'.join(comment).strip()
        if not self.dryrun:
            api.dashboard_content_ensure('repo_checker', text, 'project_only run')
            self.whitelist_clean(project)
        else:
            print(text)

        if post_comments:
            self.package_comments(project)

    def package_comments(self, project):
        self.logger.info('{} package comments'.format(len(self.package_results)))

        for package, sections in self.package_results.items():
            if bool(self.staging_config[project].get('repo_checker-package-comment-devel', True)):
                bot_name_suffix = project
                comment_project, comment_package = devel_project_fallback(self.apiurl, project, package)
                if comment_project is None or comment_package is None:
                    self.logger.warning('unable to find devel project for {}'.format(package))
                    continue

                message = 'The version of this package in [`{project}`](/package/show/{project}/{package}) ' \
                    'has installation issues and may not be installable:'.format(
                        project=project, package=package)
            else:
                bot_name_suffix = None
                comment_project = project
                comment_package = package
                message = 'This package has installation issues and may not be installable:'

            # Sort sections by text to group binaries together.
            sections = sorted(sections, key=lambda s: s.text)
            message += '\n\n<pre>\n{}\n</pre>'.format(
                '\n'.join([section.text for section in sections]).strip())

            # Generate a hash based on the binaries involved and the number of
            # sections. This eliminates version or release changes from causing
            # an update to the comment while still updating on relevant changes.
            binaries = set()
            for section in sections:
                binaries.update(section.binaries)
            info = ';'.join(['::'.join(sorted(binaries)), str(len(sections))])
            reference = hashlib.sha1(info).hexdigest()[:7]

            # Post comment on package in order to notifiy maintainers.
            self.comment_write(state='seen', result=reference, bot_name_suffix=bot_name_suffix,
                               project=comment_project, package=comment_package, message=message)

    def prepare_review(self):
        # Reset for request batch.
        self.requests_map = {}
        self.groups = {}
        self.groups_build = {}
        self.groups_skip_cycle = []

        # Manipulated in ensure_group().
        self.group = None
        self.mirrored = set()

        # Stores parsed install_check() results grouped by package.
        self.package_results = {}

        # Look for requests of interest and group by staging.
        skip_build = set()
        for request in self.requests:
            # Only interesting if request is staged.
            group = request_staged(request)
            if not group:
                self.logger.debug('{}: not staged'.format(request.reqid))
                continue

            if self.limit_group and group != self.limit_group:
                continue

            # Only interested if group has completed building.
            api = self.staging_api(request.actions[0].tgt_project)
            status = api.project_status(group, True)
            # Corrupted requests may reference non-existent projects and will
            # thus return a None status which should be considered not ready.
            if not status or str(status['overall_state']) not in ('testing', 'review', 'acceptable'):
                # Not in a "ready" state.
                openQA_only = False # Not relevant so set to False.
                if status and str(status['overall_state']) == 'failed':
                    # Exception to the rule is openQA only in failed state.
                    openQA_only = True
                    for project in api.project_status_walk(status):
                        if len(project['broken_packages']):
                            # Broken packages so not just openQA.
                            openQA_only = False
                            break

                if not self.force and not openQA_only:
                    self.logger.debug('{}: {} not ready'.format(request.reqid, group))
                    continue

            # Only interested if request is in consistent state.
            selected = api.project_status_requests('selected')
            if request.reqid not in selected:
                self.logger.debug('{}: inconsistent state'.format(request.reqid))

            if group not in self.groups_build:
                # Generate build hash based on hashes from relevant projects.
                builds = []
                for staging in api.staging_walk(group):
                    builds.append(ET.fromstringlist(show_results_meta(
                        self.apiurl, staging, multibuild=True, repository=['standard'])).get('state'))
                builds.append(ET.fromstringlist(show_results_meta(
                    self.apiurl, api.project, multibuild=True, repository=['standard'])).get('state'))

                # Include meta revision for config changes (like whitelist).
                builds.append(str(api.get_prj_meta_revision(group)))
                self.groups_build[group] = hashlib.sha1(''.join(builds)).hexdigest()[:7]

                # Determine if build has changed since last comment.
                comments = self.comment_api.get_comments(project_name=group)
                _, info = self.comment_api.comment_find(comments, self.bot_name)
                if info and self.groups_build[group] == info.get('build'):
                    skip_build.add(group)

                # Look for skip-cycle comment command.
                users = self.request_override_check_users(request.actions[0].tgt_project)
                for _, who in self.comment_api.command_find(
                    comments, self.review_user, 'skip-cycle', users):
                    self.logger.debug('comment command: skip-cycle by {}'.format(who))
                    self.groups_skip_cycle.append(group)
                    break

            if not self.force and group in skip_build:
                self.logger.debug('{}: {} build unchanged'.format(request.reqid, group))
                continue

            self.requests_map[int(request.reqid)] = group

            requests = self.groups.get(group, [])
            requests.append(request)
            self.groups[group] = requests

            self.logger.debug('{}: {} ready'.format(request.reqid, group))

        # Filter out undesirable requests and ensure requests are ordered
        # together with group for efficiency.
        count_before = len(self.requests)
        self.requests = []
        for group, requests in sorted(self.groups.items()):
            self.requests.extend(requests)

        self.logger.debug('requests: {} skipped, {} queued'.format(
            count_before - len(self.requests), len(self.requests)))

    def ensure_group(self, request, action):
        project = action.tgt_project
        group = self.requests_map[int(request.reqid)]

        if group == self.group:
            # Only process a group the first time it is encountered.
            return self.group_pass

        self.logger.info('group {}'.format(group))
        self.group = group
        self.group_pass = True

        comment = []
        for arch in self.target_archs(project):
            stagings = []
            directories = []
            ignore = set()

            for staging in self.staging_api(project).staging_walk(group):
                if arch not in self.target_archs(staging):
                    self.logger.debug('{}/{} not available'.format(staging, arch))
                    continue

                stagings.append(staging)
                directories.append(self.mirror(staging, arch))
                ignore.update(self.ignore_from_staging(project, staging, arch))

            if not len(stagings):
                continue

            # Only bother if staging can match arch, but layered first.
            directories.insert(0, self.mirror(project, arch))

            whitelist = self.binary_whitelist(project, arch, group)

            # Perform checks on group.
            results = {
                'cycle': self.cycle_check(project, stagings, arch),
                'install': self.install_check(project, directories, arch, ignore, whitelist),
            }

            if not all(result.success for _, result in results.items()):
                # Not all checks passed, build comment.
                self.group_pass = False
                self.result_comment(arch, results, comment)

        info_extra = {'build': self.groups_build[group]}
        if not self.group_pass:
            # Some checks in group did not pass, post comment.
            # Avoid identical comments with different build hash during target
            # project build phase. Once published update regardless.
            published = self.repository_published(project)
            self.comment_write(state='seen', result='failed', project=group,
                               message='\n'.join(comment).strip(), identical=True,
                               info_extra=info_extra, info_extra_identical=published)
        else:
            # Post passed comment only if previous failed comment.
            text = 'Previously reported problems have been resolved.'
            self.comment_write(state='done', result='passed', project=group,
                               message=text, identical=True, only_replace=True,
                               info_extra=info_extra)

        return self.group_pass

    def target_archs(self, project):
        archs = target_archs(self.apiurl, project)

        # Check for arch whitelist and use intersection.
        product = project.split(':Staging:', 1)[0]
        whitelist = self.staging_config[product].get('repo_checker-arch-whitelist')
        if whitelist:
            archs = list(set(whitelist.split(' ')).intersection(set(archs)))

        # Trick to prioritize x86_64.
        return sorted(archs, reverse=True)

    def mirror(self, project, arch):
        """Call bs_mirrorfull script to mirror packages."""
        directory = os.path.join(CACHEDIR, project, 'standard', arch)
        if (project, arch) in self.mirrored:
            # Only mirror once per request batch.
            return directory

        if not os.path.exists(directory):
            os.makedirs(directory)

        script = os.path.join(SCRIPT_PATH, 'bs_mirrorfull')
        path = '/'.join((project, 'standard', arch))
        url = '{}/public/build/{}'.format(self.apiurl, path)
        parts = ['LC_ALL=C', 'perl', script, '--nodebug', url, directory]
        parts = [pipes.quote(part) for part in parts]

        self.logger.info('mirroring {}'.format(path))
        if os.system(' '.join(parts)):
            raise Exception('failed to mirror {}'.format(path))

        self.mirrored.add((project, arch))
        return directory

    def ignore_from_staging(self, project, staging, arch):
        """Determine the target project binaries to ingore in favor of staging."""
        _, binary_map = package_binary_list(self.apiurl, staging, 'standard', arch)
        packages = set(binary_map.values())

        binaries, _ = package_binary_list(self.apiurl, project, 'standard', arch)
        for binary in binaries:
            if binary.package in packages:
                yield binary.name

    def binary_whitelist(self, project, arch, group):
        additions = self.staging_api(project).get_prj_pseudometa(group).get('config', {})
        prefix = 'repo_checker-binary-whitelist'
        whitelist = set()
        for key in [prefix, '-'.join([prefix, arch])]:
            whitelist.update(self.staging_config[project].get(key, '').split(' '))
            whitelist.update(additions.get(key, '').split(' '))
        whitelist = filter(None, whitelist)
        return whitelist

    def install_check(self, project, directories, arch, ignore=[], whitelist=[], parse=False):
        self.logger.info('install check: start')

        with tempfile.NamedTemporaryFile() as ignore_file:
            # Print ignored rpms on separate lines in ignore file.
            for item in ignore:
                ignore_file.write(item + '\n')
            ignore_file.flush()

            directory_project = directories.pop(0) if len(directories) > 1 else None

            # Invoke repo_checker.pl to perform an install check.
            script = os.path.join(SCRIPT_PATH, 'repo_checker.pl')
            parts = ['LC_ALL=C', 'perl', script, arch, ','.join(directories),
                     '-f', ignore_file.name, '-w', ','.join(whitelist)]
            if directory_project:
                parts.extend(['-r', directory_project])

            parts = [pipes.quote(part) for part in parts]
            p = subprocess.Popen(' '.join(parts), shell=True,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE, close_fds=True)
            stdout, stderr = p.communicate()

        if p.returncode:
            self.logger.info('install check: failed')
            if p.returncode == 126:
                self.logger.warn('mirror cache reset due to corruption')
                self.mirrored = set()
            elif parse:
                # Parse output for later consumption for posting comments.
                sections = self.install_check_parse(stdout)
                self.install_check_sections_group(parse, arch, sections)

            # Format output as markdown comment.
            parts = []

            stdout = stdout.strip()
            if stdout:
                parts.append('<pre>\n' + stdout + '\n' + '</pre>\n')
            stderr = stderr.strip()
            if stderr:
                parts.append('<pre>\n' + stderr + '\n' + '</pre>\n')

            header = '### [install check & file conflicts](/package/view_file/{}:Staging/dashboard/repo_checker)\n\n'.format(project)
            return CheckResult(False, header + ('\n' + ('-' * 80) + '\n\n').join(parts))


        self.logger.info('install check: passed')
        return CheckResult(True, None)

    def install_check_sections_group(self, project, arch, sections):
        _, binary_map = package_binary_list(self.apiurl, project, 'standard', arch)

        for section in sections:
            # If switch to creating bugs likely makes sense to join packages to
            # form grouping key and create shared bugs for conflicts.
            # Added check for b in binary_map after encountering:
            # https://lists.opensuse.org/opensuse-buildservice/2017-08/msg00035.html
            # Under normal circumstances this should never occur.
            packages = set([binary_map[b] for b in section.binaries if b in binary_map])
            for package in packages:
                self.package_results.setdefault(package, [])
                self.package_results[package].append(section)

    def install_check_parse(self, output):
        section = None
        text = None

        # Loop over lines and parse into chunks assigned to binaries.
        for line in output.splitlines(True):
            if line.startswith(' '):
                if section:
                    text += line
            else:
                if section:
                    yield InstallSection(section, text)

                match = re.match(INSTALL_REGEX, line)
                if match:
                    # Remove empty groups since regex matches different patterns.
                    binaries = [b for b in match.groups() if b is not None]
                    section = binaries
                    text = line
                else:
                    section = None

        if section:
            yield InstallSection(section, text)

    def cycle_check(self, project, stagings, arch):
        if self.skip_cycle or self.group in self.groups_skip_cycle:
            self.logger.info('cycle check: skip due to --skip-cycle or comment command')
            return CheckResult(True, None)

        self.logger.info('cycle check: start')
        cycle_detector = CycleDetector(self.staging_api(project))
        comment = []
        for staging in stagings:
            first = True
            for index, (cycle, new_edges, new_packages) in enumerate(
                cycle_detector.cycles(staging, arch=arch), start=1):
                if not new_packages:
                    continue

                if first:
                    comment.append('### new [cycle(s)](/project/repository_state/{}/standard)\n'.format(staging))
                    first = False

                # New package involved in cycle, build comment.
                comment.append('- #{}: {} package cycle, {} new edges'.format(
                    index, len(cycle), len(new_edges)))

                comment.append('   - cycle')
                for package in sorted(cycle):
                    comment.append('      - {}'.format(package))

                comment.append('   - new edges')
                for edge in sorted(new_edges):
                    comment.append('      - ({}, {})'.format(edge[0], edge[1]))

        if len(comment):
            # New cycles, post comment.
            self.logger.info('cycle check: failed')
            return CheckResult(False, '\n'.join(comment) + '\n')

        self.logger.info('cycle check: passed')
        return CheckResult(True, None)

    def result_comment(self, arch, results, comment):
        """Generate comment from results"""
        comment.append('## ' + arch + '\n')
        for result in results.values():
            if not result.success:
                comment.append(result.comment)

    def check_action_submit(self, request, action):
        if not self.ensure_group(request, action):
            return None

        self.review_messages['accepted'] = 'cycle and install check passed'
        return True

    def check_action_delete(self, request, action):
        # TODO Ignore tgt_project packages that depend on this that are part of
        # ignore list as and instead look at output from staging for those.

        built_binaries = set([])
        revdeps = set([])
        for fileinfo in fileinfo_ext_all(self.apiurl, action.tgt_project, 'standard', 'x86_64', action.tgt_package):
            built_binaries.add(fileinfo.find('name').text)
            for requiredby in fileinfo.findall('provides_ext/requiredby[@name]'):
                revdeps.add(requiredby.get('name'))
        runtime_deps = sorted(revdeps - built_binaries)

        what_depends_on = depends_on(self.apiurl, action.tgt_project, 'standard', [action.tgt_package], True)

        # filter out dependency on package itself (happens with eg
        # java bootstrapping itself with previous build)
        if action.tgt_package in what_depends_on:
            what_depends_on.remove(action.tgt_package)

        if len(what_depends_on):
            self.logger.warn('{} is still a build requirement of:\n\n- {}'.format(
                action.tgt_package, '\n- '.join(sorted(what_depends_on))))

        if len(runtime_deps):
            self.logger.warn('{} provides runtime dependencies to:\n\n- {}'.format(
                action.tgt_package, '\n- '.join(runtime_deps)))

        if len(self.comment_handler.lines):
            self.comment_write(state='seen', result='failed')
            return None

        # Allow for delete to be declined before ensuring group passed.
        if not self.ensure_group(request, action):
            return None

        self.review_messages['accepted'] = 'delete request is safe'
        return True

    def whitelist_clean(self, project, interactive=False):
        from copy import copy
        from difflib import unified_diff
        from osclib.core import BINARY_REGEX

        api = self.staging_api(project)

        # Determine which binaries are mentioned in repo_checker output.
        content = api.dashboard_content_load('repo_checker')
        sections = self.install_check_parse(content)
        binaries = set()
        for section in sections:
            for binary in section.binaries:
                match = re.match(BINARY_REGEX, binary)
                if match:
                    binaries.add(match.group('name'))

        # Find whitelist config entries and filter list to only those mentioned.
        prefix = 'repo_checker-binary-whitelist'
        binaries_common = None
        whitelists = {}
        whitelists_remaining = {}
        for key, value in self.staging_config[project].items():
            if not key.startswith(prefix):
                continue

            whitelists[key] = set(value.split())
            whitelists_remaining[key] = whitelists[key].intersection(binaries)

            if key != prefix:
                if binaries_common is None:
                    binaries_common = whitelists_remaining[key]
                else:
                    binaries_common = binaries_common.intersection(whitelists_remaining[key])

        if len(binaries_common):
            # Remove entries common to all archs and place in common whitelist.
            whitelists_remaining[prefix].update(binaries_common)

            for key, value in whitelists_remaining.items():
                if key == prefix:
                    continue

                whitelists_remaining[key] = whitelists_remaining[key] - binaries_common

        # Update whitelist entries with new values.
        config = api.dashboard_content_load('config').splitlines(True)
        config_new = copy(config)
        for key, value in whitelists_remaining.items():
            if value != whitelists[key]:
                self.whitelist_clean_set(config_new, key, ' '.join(value))

        if config == config_new:
            print('No changes')
            return

        # Present diff and prompt to apply.
        print(''.join(unified_diff(config, config_new, fromfile='config.orig', tofile='config.new')))
        print('Apply config changes? [y/n] (y): ',  end='')
        if interactive:
            response = raw_input().lower()
            if response != '' and response != 'y':
                print('quit')
                return
        else:
            print('y')

        api.dashboard_content_save('config', ''.join(config_new), 'repo_checker whitelist clean')

    def whitelist_clean_set(self, config, key, value):
        # Unfortunately even OscConfigParser does not preserve empty lines.
        for i, line in enumerate(config):
            if line.startswith(key + ' ='):
                config[i] = '{} = {}\n'.format(key, value) if value else '{} =\n'.format(key)

        return config


class CommandLineInterface(ReviewBot.CommandLineInterface):

    def __init__(self, *args, **kwargs):
        ReviewBot.CommandLineInterface.__init__(self, args, kwargs)
        self.clazz = RepoChecker

    def get_optparser(self):
        parser = ReviewBot.CommandLineInterface.get_optparser(self)

        parser.add_option('--skip-cycle', action='store_true', help='skip cycle check')
        parser.add_option('--force', action='store_true', help='force review even if project is not ready')
        parser.add_option('--limit-group', metavar='GROUP', help='only review requests in specific group')

        return parser

    def setup_checker(self):
        bot = ReviewBot.CommandLineInterface.setup_checker(self)

        if self.options.skip_cycle:
            bot.skip_cycle = self.options.skip_cycle

        bot.force = self.options.force
        bot.limit_group = self.options.limit_group

        return bot

    @cmdln.option('--post-comments', action='store_true', help='post comments to packages with issues')
    def do_project_only(self, subcmd, opts, project):
        self.checker.check_requests() # Needed to properly init ReviewBot.
        self.checker.project_only(project, opts.post_comments)

    def do_whitelist_clean(self, subcmd, opts, project):
        self.checker.check_requests() # Needed to properly init ReviewBot.
        self.checker.whitelist_clean(project, interactive=True)

if __name__ == "__main__":
    app = CommandLineInterface()
    sys.exit(app.main())