repo_checker: Simplify cycle check

No longer compare against the target project's cycle, but just against
a configured list of package names. This way we're not bound to
refreezing stagings if we reduced cycles and it's clearer to the
operator what happens and how to react to it.
This commit is contained in:
Stephan Kulow 2019-02-11 09:02:29 +01:00
parent 9984223b01
commit 3cd79db206
4 changed files with 36 additions and 274 deletions

View File

@ -3,6 +3,7 @@ from xml.etree import cElementTree as ET
from osc.core import makeurl from osc.core import makeurl
from osc.core import http_GET from osc.core import http_GET
from osclib.core import fileinfo_ext_all from osclib.core import fileinfo_ext_all
from osclib.core import builddepinfo
try: try:
from urllib.error import HTTPError from urllib.error import HTTPError
@ -73,9 +74,7 @@ class CleanupRings(object):
def fill_pkgdeps(self, prj, repo, arch): def fill_pkgdeps(self, prj, repo, arch):
url = makeurl(self.api.apiurl, ['build', prj, repo, arch, '_builddepinfo']) root = builddepinfo(self.api.apiurl, prj, repo, arch)
f = http_GET(url)
root = ET.parse(f).getroot()
for package in root.findall('package'): for package in root.findall('package'):
# use main package name for multibuild. We can't just ignore # use main package name for multibuild. We can't just ignore

View File

@ -232,6 +232,13 @@ def fileinfo_ext(apiurl, project, repo, arch, package, filename):
{'view': 'fileinfo_ext'}) {'view': 'fileinfo_ext'})
return ET.parse(http_GET(url)).getroot() return ET.parse(http_GET(url)).getroot()
def builddepinfo(apiurl, project, repo, arch, order = False):
query = {}
if order:
query['view'] = 'order'
url = makeurl(apiurl, ['build', project, repo, arch, '_builddepinfo'], query)
return ETL.parse(http_GET(url)).getroot()
def entity_email(apiurl, key, entity_type='person', include_name=False): def entity_email(apiurl, key, entity_type='person', include_name=False):
url = makeurl(apiurl, [entity_type, key]) url = makeurl(apiurl, [entity_type, key])
root = ET.parse(http_GET(url)).getroot() root = ET.parse(http_GET(url)).getroot()

View File

@ -1,218 +0,0 @@
try:
from urllib.error import HTTPError
except ImportError:
# python 2.x
from urllib2 import HTTPError
from xml.etree import cElementTree as ET
from osc.core import http_GET
from osc.core import makeurl
from .memoize import memoize
class Graph(dict):
"""Graph object. Inspired in NetworkX data model."""
def __init__(self):
"""Initialize an empty graph."""
# The nodes are stored in the Graph dict itself, but the
# adjacent list is stored as an attribute.
self.adj = {}
def add_node(self, name, value):
"""Add a node in the graph."""
self[name] = value
if name not in self.adj:
self.adj[name] = set()
def add_nodes_from(self, nodes_and_values):
"""Add multiple nodes"""
for node, value in nodes_and_values:
self.add_node(node, value)
def add_edge(self, u, v, directed=True):
"""Add the edge u -> v, an v -> u if not directed."""
self.adj[u].add(v)
if not directed:
self.adj[v].add(u)
def add_edges_from(self, edges, directed=True):
"""Add the edges from an iterator."""
for u, v in edges:
self.add_edge(u, v, directed)
def remove_edge(self, u, v, directed=True):
"""Remove the edge u -> v, an v -> u if not directed."""
try:
self.adj[u].remove(v)
except KeyError:
pass
if not directed:
try:
self.adj[v].remove(u)
except KeyError:
pass
def remove_edges_from(self, edges, directed=True):
"""Remove the edges from an iterator."""
for u, v in edges:
self.remove_edge(u, v, directed)
def edges(self, v):
"""Get the adjancent list for a vertex."""
return sorted(self.adj[v]) if v in self else ()
def edges_to(self, v):
"""Get the all the vertex that point to v."""
return sorted(u for u in self.adj if v in self.adj[u])
def cycles(self):
"""Detect cycles using Tarjan algorithm."""
index = [0]
path = []
cycles = []
v_index = {}
v_lowlink = {}
def scc(node, v):
v_index[v], v_lowlink[v] = index[0], index[0]
index[0] += 1
path.append(node)
for succ in self.adj.get(node, []):
w = self[succ]
if w not in v_index:
scc(succ, w)
v_lowlink[v] = min(v_lowlink[v], v_lowlink[w])
elif succ in path:
v_lowlink[v] = min(v_lowlink[v], v_index[w])
if v_index[v] == v_lowlink[v]:
i = path.index(node)
path[:], cycle = path[:i], frozenset(path[i:])
if len(cycle) > 1:
cycles.append(cycle)
for node in sorted(self):
v = self[node]
if not getattr(v, 'index', 0):
scc(node, v)
return frozenset(cycles)
class Package(object):
"""Simple package container. Used in a graph as a vertex."""
def __init__(self, pkg=None, src=None, deps=None, subs=None,
element=None):
self.pkg = pkg
self.src = src
self.deps = deps
self.subs = subs
if element:
self.load(element)
def load(self, element):
"""Load a node from a ElementTree package XML element"""
self.pkg = element.attrib['name']
self.src = [e.text for e in element.findall('source')]
assert len(self.src) == 1, 'There are more that one source packages in the graph'
self.src = self.src[0]
self.deps = set(e.text for e in element.findall('pkgdep'))
self.subs = set(e.text for e in element.findall('subpkg'))
def __repr__(self):
return 'PKG: %s\nSRC: %s\nDEPS: %s\n SUBS: %s' % (self.pkg,
self.src,
self.deps,
self.subs)
class CycleDetector(object):
"""Class to detect cycles in an OBS project."""
def __init__(self, apiurl):
self.apiurl = apiurl
# Store packages prevoiusly ignored. Don't pollute the screen.
self._ignore_packages = set()
def _builddepinfo(self, project, repository, arch):
root = None
try:
# print('Generating _builddepinfo for (%s, %s, %s)' % (project, repository, arch))
url = makeurl(self.apiurl, ['build/%s/%s/%s/_builddepinfo' % (project, repository, arch)])
root = http_GET(url).read()
except HTTPError as e:
print('ERROR in URL %s [%s]' % (url, e))
return root
def _get_builddepinfo_graph(self, project, repository, arch):
"""Generate the buildepinfo graph for a given architecture."""
# Note, by default generate the graph for all Factory /
# 13/2. If you only need the base packages you can use:
# project = 'Base:System'
# repository = 'openSUSE_Factory'
root = ET.fromstring(self._builddepinfo(project, repository, arch))
# Reset the subpackages dict here, so for every graph is a
# different object.
packages = [Package(element=e) for e in root.findall('package')]
graph = Graph()
graph.add_nodes_from((p.pkg, p) for p in packages)
subpkgs = {} # Given a subpackage, recover the source package
for p in packages:
# Check for packages that provides the same subpackage
for subpkg in p.subs:
if subpkg in subpkgs:
# print 'Subpackage duplication %s - %s (subpkg: %s)' % (p.pkg, subpkgs[subpkg], subpkg)
pass
else:
subpkgs[subpkg] = p.pkg
for p in packages:
# Calculate the missing deps
deps = p.deps
missing = set(deps) - set(subpkgs)
if missing:
if p.pkg not in self._ignore_packages:
# print 'Ignoring package. Missing dependencies %s -> (%s) %s...' % (p.pkg, len(missing), missing[:5])
self._ignore_packages.add(p.pkg)
continue
graph.add_edges_from((p.pkg, subpkgs[d]) for d in deps)
# Store the subpkgs dict in the graph. It will be used later.
graph.subpkgs = subpkgs
return graph
def cycles(self, override_pair, overridden_pair, arch):
"""Detect cycles in a specific repository."""
# Detect cycles - We create the full graph from _builddepinfo.
project_graph = self._get_builddepinfo_graph(overridden_pair[0], overridden_pair[1], arch)
current_graph = self._get_builddepinfo_graph(override_pair[0], override_pair[1], arch)
# Sometimes, new cycles have only new edges, but not new
# packages. We need to inform about this, so this can become
# a warning instead of an error.
#
# To do that, we store in `project_cycles_pkgs` all the
# project (i.e Factory) cycles as a set of packages, so we can
# check if the new cycle (also as a set of packages) is
# included here.
project_cycles = project_graph.cycles()
project_cycles_pkgs = [set(cycle) for cycle in project_cycles]
for cycle in current_graph.cycles():
if cycle not in project_cycles:
project_edges = set((u, v) for u in cycle for v in project_graph.edges(u) if v in cycle)
current_edges = set((u, v) for u in cycle for v in current_graph.edges(u) if v in cycle)
current_pkgs = set(cycle)
yield (cycle,
sorted(current_edges - project_edges),
not any(current_pkgs.issubset(cpkgs) for cpkgs in project_cycles_pkgs))

View File

@ -18,6 +18,7 @@ from osclib.cache_manager import CacheManager
from osclib.conf import Config from osclib.conf import Config
from osclib.conf import str2bool from osclib.conf import str2bool
from osclib.core import BINARY_REGEX from osclib.core import BINARY_REGEX
from osclib.core import builddepinfo
from osclib.core import depends_on from osclib.core import depends_on
from osclib.core import devel_project_fallback from osclib.core import devel_project_fallback
from osclib.core import fileinfo_ext_all from osclib.core import fileinfo_ext_all
@ -32,7 +33,6 @@ from osclib.core import repositories_states
from osclib.core import repository_arch_state from osclib.core import repository_arch_state
from osclib.core import repositories_published from osclib.core import repositories_published
from osclib.core import target_archs from osclib.core import target_archs
from osclib.cycle import CycleDetector
from osclib.memoize import memoize from osclib.memoize import memoize
from osclib.util import sha1_short from osclib.util import sha1_short
@ -55,7 +55,6 @@ class RepoChecker(ReviewBot.ReviewBot):
self.comment_handler = True self.comment_handler = True
# RepoChecker options. # RepoChecker options.
self.skip_cycle = False
self.force = False self.force = False
def project_only(self, project, post_comments=False): def project_only(self, project, post_comments=False):
@ -278,52 +277,27 @@ class RepoChecker(ReviewBot.ReviewBot):
if section: if section:
yield InstallSection(section, text) yield InstallSection(section, text)
@memoize(ttl=60, session=True) def cycle_check(self, project, repository, arch, cycle_packages):
def cycle_check_skip(self, project): self.logger.info('cycle check: start %s/%s/%s' % (project, repository, arch))
if self.skip_cycle:
return True
# Look for skip-cycle comment command.
comments = self.comment_api.get_comments(project_name=project)
users = self.request_override_check_users(project)
for _, who in self.comment_api.command_find(
comments, self.review_user, 'skip-cycle', users):
self.logger.debug('comment command: skip-cycle by {}'.format(who))
return True
return False
def cycle_check(self, override_pair, overridden_pair, arch):
if self.cycle_check_skip(override_pair[0]):
self.logger.info('cycle check: skip due to --skip-cycle or comment command')
return CheckResult(True, None)
self.logger.info('cycle check: start')
comment = [] comment = []
first = True
cycle_detector = CycleDetector(self.apiurl)
for index, (cycle, new_edges, new_packages) in enumerate(
cycle_detector.cycles(override_pair, overridden_pair, arch), start=1):
if not new_packages: allowed_cycles = []
continue if cycle_packages:
for comma_list in cycle_packages.split(';'):
allowed_cycles.append(comma_list.split(','))
if first: depinfo = builddepinfo(self.apiurl, project, repository, arch, order = False)
comment.append('### new [cycle(s)](/project/repository_state/{}/{})\n'.format( for cycle in depinfo.findall('cycle'):
override_pair[0], override_pair[1])) for package in cycle.findall('package'):
first = False package = package.text
allowed = False
# New package involved in cycle, build comment. for acycle in allowed_cycles:
comment.append('- #{}: {} package cycle, {} new edges'.format( if package in acycle:
index, len(cycle), len(new_edges))) allowed = True
break
comment.append(' - cycle') if not allowed:
for package in sorted(cycle): cycled = [p.text for p in cycle.findall('package')]
comment.append(' - {}'.format(package)) comment.append('Package {} appears in cycle {}'.format(package, '/'.join(cycled)))
comment.append(' - new edges')
for edge in sorted(new_edges):
comment.append(' - ({}, {})'.format(edge[0], edge[1]))
if len(comment): if len(comment):
# New cycles, post comment. # New cycles, post comment.
@ -375,7 +349,7 @@ class RepoChecker(ReviewBot.ReviewBot):
return None return None
@memoize(session=True) @memoize(session=True)
def repository_check(self, repository_pairs, state_hash, simulate_merge, whitelist=None, arch_whitelist=None, post_comments=False): def repository_check(self, repository_pairs, state_hash, simulate_merge, whitelist=None, arch_whitelist=None, post_comments=False, cycle_packages=None):
comment = [] comment = []
project, repository = repository_pairs[0] project, repository = repository_pairs[0]
self.logger.info('checking {}/{}@{}[{}]'.format( self.logger.info('checking {}/{}@{}[{}]'.format(
@ -438,7 +412,7 @@ class RepoChecker(ReviewBot.ReviewBot):
whitelist = self.binary_whitelist(repository_pairs[0], repository_pairs[1], arch) whitelist = self.binary_whitelist(repository_pairs[0], repository_pairs[1], arch)
results = { results = {
'cycle': self.cycle_check(repository_pairs[0], repository_pairs[1], arch), 'cycle': self.cycle_check(repository_pairs[0][0], repository_pairs[0][1], arch, cycle_packages),
'install': self.install_check( 'install': self.install_check(
repository_pairs[1], arch, directories, ignore, whitelist), repository_pairs[1], arch, directories, ignore, whitelist),
} }
@ -576,6 +550,7 @@ class RepoChecker(ReviewBot.ReviewBot):
config = Config.get(self.apiurl, action.tgt_project) config = Config.get(self.apiurl, action.tgt_project)
staging = config.get('staging') staging = config.get('staging')
arch_whitelist = config.get('repo_checker-arch-whitelist') arch_whitelist = config.get('repo_checker-arch-whitelist')
cycle_packages = config.get('repo_checker-allowed-in-cycles')
if staging: if staging:
api = self.staging_api(staging) api = self.staging_api(staging)
if not api.is_adi_project(repository_pairs[0][0]): if not api.is_adi_project(repository_pairs[0][0]):
@ -585,7 +560,10 @@ class RepoChecker(ReviewBot.ReviewBot):
whitelist = config.get('repo_checker-binary-whitelist-ring', '').split(' ') whitelist = config.get('repo_checker-binary-whitelist-ring', '').split(' ')
state_hash = self.repository_state(repository_pairs, True) state_hash = self.repository_state(repository_pairs, True)
if not self.repository_check(repository_pairs, state_hash, True, arch_whitelist=arch_whitelist, whitelist=whitelist): if not self.repository_check(repository_pairs, state_hash, True,
arch_whitelist=arch_whitelist,
whitelist=whitelist,
cycle_packages=cycle_packages):
return None return None
self.review_messages['accepted'] = 'cycle and install check passed' self.review_messages['accepted'] = 'cycle and install check passed'
@ -658,7 +636,6 @@ class CommandLineInterface(ReviewBot.CommandLineInterface):
def get_optparser(self): def get_optparser(self):
parser = ReviewBot.CommandLineInterface.get_optparser(self) parser = ReviewBot.CommandLineInterface.get_optparser(self)
parser.add_option('--skip-cycle', action='store_true', help='skip cycle check')
parser.add_option('--force', action='store_true', help='force review even if project is not ready') parser.add_option('--force', action='store_true', help='force review even if project is not ready')
return parser return parser
@ -666,9 +643,6 @@ class CommandLineInterface(ReviewBot.CommandLineInterface):
def setup_checker(self): def setup_checker(self):
bot = ReviewBot.CommandLineInterface.setup_checker(self) bot = ReviewBot.CommandLineInterface.setup_checker(self)
if self.options.skip_cycle:
bot.skip_cycle = self.options.skip_cycle
bot.force = self.options.force bot.force = self.options.force
return bot return bot
@ -678,6 +652,6 @@ class CommandLineInterface(ReviewBot.CommandLineInterface):
self.checker.check_requests() # Needed to properly init ReviewBot. self.checker.check_requests() # Needed to properly init ReviewBot.
self.checker.project_only(project, opts.post_comments) self.checker.project_only(project, opts.post_comments)
if __name__ == "__main__": if __name__ == '__main__':
app = CommandLineInterface() app = CommandLineInterface()
sys.exit(app.main()) sys.exit(app.main())