Merge pull request #1860 from coolo/simplify_cycle_detection

repo_checker: Simplify cycle check
This commit is contained in:
Stephan Kulow 2019-02-13 07:38:54 +01:00 committed by GitHub
commit fe23e8d1a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 36 additions and 274 deletions

View File

@ -3,6 +3,7 @@ from xml.etree import cElementTree as ET
from osc.core import makeurl from osc.core import makeurl
from osc.core import http_GET from osc.core import http_GET
from osclib.core import fileinfo_ext_all from osclib.core import fileinfo_ext_all
from osclib.core import builddepinfo
try: try:
from urllib.error import HTTPError from urllib.error import HTTPError
@ -73,9 +74,7 @@ class CleanupRings(object):
def fill_pkgdeps(self, prj, repo, arch): def fill_pkgdeps(self, prj, repo, arch):
url = makeurl(self.api.apiurl, ['build', prj, repo, arch, '_builddepinfo']) root = builddepinfo(self.api.apiurl, prj, repo, arch)
f = http_GET(url)
root = ET.parse(f).getroot()
for package in root.findall('package'): for package in root.findall('package'):
# use main package name for multibuild. We can't just ignore # use main package name for multibuild. We can't just ignore

View File

@ -232,6 +232,13 @@ def fileinfo_ext(apiurl, project, repo, arch, package, filename):
{'view': 'fileinfo_ext'}) {'view': 'fileinfo_ext'})
return ET.parse(http_GET(url)).getroot() return ET.parse(http_GET(url)).getroot()
def builddepinfo(apiurl, project, repo, arch, order = False):
query = {}
if order:
query['view'] = 'order'
url = makeurl(apiurl, ['build', project, repo, arch, '_builddepinfo'], query)
return ETL.parse(http_GET(url)).getroot()
def entity_email(apiurl, key, entity_type='person', include_name=False): def entity_email(apiurl, key, entity_type='person', include_name=False):
url = makeurl(apiurl, [entity_type, key]) url = makeurl(apiurl, [entity_type, key])
root = ET.parse(http_GET(url)).getroot() root = ET.parse(http_GET(url)).getroot()

View File

@ -1,218 +0,0 @@
try:
from urllib.error import HTTPError
except ImportError:
# python 2.x
from urllib2 import HTTPError
from xml.etree import cElementTree as ET
from osc.core import http_GET
from osc.core import makeurl
from .memoize import memoize
class Graph(dict):
"""Graph object. Inspired in NetworkX data model."""
def __init__(self):
"""Initialize an empty graph."""
# The nodes are stored in the Graph dict itself, but the
# adjacent list is stored as an attribute.
self.adj = {}
def add_node(self, name, value):
"""Add a node in the graph."""
self[name] = value
if name not in self.adj:
self.adj[name] = set()
def add_nodes_from(self, nodes_and_values):
"""Add multiple nodes"""
for node, value in nodes_and_values:
self.add_node(node, value)
def add_edge(self, u, v, directed=True):
"""Add the edge u -> v, an v -> u if not directed."""
self.adj[u].add(v)
if not directed:
self.adj[v].add(u)
def add_edges_from(self, edges, directed=True):
"""Add the edges from an iterator."""
for u, v in edges:
self.add_edge(u, v, directed)
def remove_edge(self, u, v, directed=True):
"""Remove the edge u -> v, an v -> u if not directed."""
try:
self.adj[u].remove(v)
except KeyError:
pass
if not directed:
try:
self.adj[v].remove(u)
except KeyError:
pass
def remove_edges_from(self, edges, directed=True):
"""Remove the edges from an iterator."""
for u, v in edges:
self.remove_edge(u, v, directed)
def edges(self, v):
"""Get the adjancent list for a vertex."""
return sorted(self.adj[v]) if v in self else ()
def edges_to(self, v):
"""Get the all the vertex that point to v."""
return sorted(u for u in self.adj if v in self.adj[u])
def cycles(self):
"""Detect cycles using Tarjan algorithm."""
index = [0]
path = []
cycles = []
v_index = {}
v_lowlink = {}
def scc(node, v):
v_index[v], v_lowlink[v] = index[0], index[0]
index[0] += 1
path.append(node)
for succ in self.adj.get(node, []):
w = self[succ]
if w not in v_index:
scc(succ, w)
v_lowlink[v] = min(v_lowlink[v], v_lowlink[w])
elif succ in path:
v_lowlink[v] = min(v_lowlink[v], v_index[w])
if v_index[v] == v_lowlink[v]:
i = path.index(node)
path[:], cycle = path[:i], frozenset(path[i:])
if len(cycle) > 1:
cycles.append(cycle)
for node in sorted(self):
v = self[node]
if not getattr(v, 'index', 0):
scc(node, v)
return frozenset(cycles)
class Package(object):
"""Simple package container. Used in a graph as a vertex."""
def __init__(self, pkg=None, src=None, deps=None, subs=None,
element=None):
self.pkg = pkg
self.src = src
self.deps = deps
self.subs = subs
if element:
self.load(element)
def load(self, element):
"""Load a node from a ElementTree package XML element"""
self.pkg = element.attrib['name']
self.src = [e.text for e in element.findall('source')]
assert len(self.src) == 1, 'There are more that one source packages in the graph'
self.src = self.src[0]
self.deps = set(e.text for e in element.findall('pkgdep'))
self.subs = set(e.text for e in element.findall('subpkg'))
def __repr__(self):
return 'PKG: %s\nSRC: %s\nDEPS: %s\n SUBS: %s' % (self.pkg,
self.src,
self.deps,
self.subs)
class CycleDetector(object):
"""Class to detect cycles in an OBS project."""
def __init__(self, apiurl):
self.apiurl = apiurl
# Store packages prevoiusly ignored. Don't pollute the screen.
self._ignore_packages = set()
def _builddepinfo(self, project, repository, arch):
root = None
try:
# print('Generating _builddepinfo for (%s, %s, %s)' % (project, repository, arch))
url = makeurl(self.apiurl, ['build/%s/%s/%s/_builddepinfo' % (project, repository, arch)])
root = http_GET(url).read()
except HTTPError as e:
print('ERROR in URL %s [%s]' % (url, e))
return root
def _get_builddepinfo_graph(self, project, repository, arch):
"""Generate the buildepinfo graph for a given architecture."""
# Note, by default generate the graph for all Factory /
# 13/2. If you only need the base packages you can use:
# project = 'Base:System'
# repository = 'openSUSE_Factory'
root = ET.fromstring(self._builddepinfo(project, repository, arch))
# Reset the subpackages dict here, so for every graph is a
# different object.
packages = [Package(element=e) for e in root.findall('package')]
graph = Graph()
graph.add_nodes_from((p.pkg, p) for p in packages)
subpkgs = {} # Given a subpackage, recover the source package
for p in packages:
# Check for packages that provides the same subpackage
for subpkg in p.subs:
if subpkg in subpkgs:
# print 'Subpackage duplication %s - %s (subpkg: %s)' % (p.pkg, subpkgs[subpkg], subpkg)
pass
else:
subpkgs[subpkg] = p.pkg
for p in packages:
# Calculate the missing deps
deps = p.deps
missing = set(deps) - set(subpkgs)
if missing:
if p.pkg not in self._ignore_packages:
# print 'Ignoring package. Missing dependencies %s -> (%s) %s...' % (p.pkg, len(missing), missing[:5])
self._ignore_packages.add(p.pkg)
continue
graph.add_edges_from((p.pkg, subpkgs[d]) for d in deps)
# Store the subpkgs dict in the graph. It will be used later.
graph.subpkgs = subpkgs
return graph
def cycles(self, override_pair, overridden_pair, arch):
"""Detect cycles in a specific repository."""
# Detect cycles - We create the full graph from _builddepinfo.
project_graph = self._get_builddepinfo_graph(overridden_pair[0], overridden_pair[1], arch)
current_graph = self._get_builddepinfo_graph(override_pair[0], override_pair[1], arch)
# Sometimes, new cycles have only new edges, but not new
# packages. We need to inform about this, so this can become
# a warning instead of an error.
#
# To do that, we store in `project_cycles_pkgs` all the
# project (i.e Factory) cycles as a set of packages, so we can
# check if the new cycle (also as a set of packages) is
# included here.
project_cycles = project_graph.cycles()
project_cycles_pkgs = [set(cycle) for cycle in project_cycles]
for cycle in current_graph.cycles():
if cycle not in project_cycles:
project_edges = set((u, v) for u in cycle for v in project_graph.edges(u) if v in cycle)
current_edges = set((u, v) for u in cycle for v in current_graph.edges(u) if v in cycle)
current_pkgs = set(cycle)
yield (cycle,
sorted(current_edges - project_edges),
not any(current_pkgs.issubset(cpkgs) for cpkgs in project_cycles_pkgs))

View File

@ -18,6 +18,7 @@ from osclib.cache_manager import CacheManager
from osclib.conf import Config from osclib.conf import Config
from osclib.conf import str2bool from osclib.conf import str2bool
from osclib.core import BINARY_REGEX from osclib.core import BINARY_REGEX
from osclib.core import builddepinfo
from osclib.core import depends_on from osclib.core import depends_on
from osclib.core import devel_project_fallback from osclib.core import devel_project_fallback
from osclib.core import fileinfo_ext_all from osclib.core import fileinfo_ext_all
@ -32,7 +33,6 @@ from osclib.core import repositories_states
from osclib.core import repository_arch_state from osclib.core import repository_arch_state
from osclib.core import repositories_published from osclib.core import repositories_published
from osclib.core import target_archs from osclib.core import target_archs
from osclib.cycle import CycleDetector
from osclib.memoize import memoize from osclib.memoize import memoize
from osclib.util import sha1_short from osclib.util import sha1_short
@ -55,7 +55,6 @@ class RepoChecker(ReviewBot.ReviewBot):
self.comment_handler = True self.comment_handler = True
# RepoChecker options. # RepoChecker options.
self.skip_cycle = False
self.force = False self.force = False
def project_only(self, project, post_comments=False): def project_only(self, project, post_comments=False):
@ -278,52 +277,27 @@ class RepoChecker(ReviewBot.ReviewBot):
if section: if section:
yield InstallSection(section, text) yield InstallSection(section, text)
@memoize(ttl=60, session=True) def cycle_check(self, project, repository, arch, cycle_packages):
def cycle_check_skip(self, project): self.logger.info('cycle check: start %s/%s/%s' % (project, repository, arch))
if self.skip_cycle:
return True
# Look for skip-cycle comment command.
comments = self.comment_api.get_comments(project_name=project)
users = self.request_override_check_users(project)
for _, who in self.comment_api.command_find(
comments, self.review_user, 'skip-cycle', users):
self.logger.debug('comment command: skip-cycle by {}'.format(who))
return True
return False
def cycle_check(self, override_pair, overridden_pair, arch):
if self.cycle_check_skip(override_pair[0]):
self.logger.info('cycle check: skip due to --skip-cycle or comment command')
return CheckResult(True, None)
self.logger.info('cycle check: start')
comment = [] comment = []
first = True
cycle_detector = CycleDetector(self.apiurl)
for index, (cycle, new_edges, new_packages) in enumerate(
cycle_detector.cycles(override_pair, overridden_pair, arch), start=1):
if not new_packages: allowed_cycles = []
continue if cycle_packages:
for comma_list in cycle_packages.split(';'):
allowed_cycles.append(comma_list.split(','))
if first: depinfo = builddepinfo(self.apiurl, project, repository, arch, order = False)
comment.append('### new [cycle(s)](/project/repository_state/{}/{})\n'.format( for cycle in depinfo.findall('cycle'):
override_pair[0], override_pair[1])) for package in cycle.findall('package'):
first = False package = package.text
allowed = False
# New package involved in cycle, build comment. for acycle in allowed_cycles:
comment.append('- #{}: {} package cycle, {} new edges'.format( if package in acycle:
index, len(cycle), len(new_edges))) allowed = True
break
comment.append(' - cycle') if not allowed:
for package in sorted(cycle): cycled = [p.text for p in cycle.findall('package')]
comment.append(' - {}'.format(package)) comment.append('Package {} appears in cycle {}'.format(package, '/'.join(cycled)))
comment.append(' - new edges')
for edge in sorted(new_edges):
comment.append(' - ({}, {})'.format(edge[0], edge[1]))
if len(comment): if len(comment):
# New cycles, post comment. # New cycles, post comment.
@ -375,7 +349,7 @@ class RepoChecker(ReviewBot.ReviewBot):
return None return None
@memoize(session=True) @memoize(session=True)
def repository_check(self, repository_pairs, state_hash, simulate_merge, whitelist=None, arch_whitelist=None, post_comments=False): def repository_check(self, repository_pairs, state_hash, simulate_merge, whitelist=None, arch_whitelist=None, post_comments=False, cycle_packages=None):
comment = [] comment = []
project, repository = repository_pairs[0] project, repository = repository_pairs[0]
self.logger.info('checking {}/{}@{}[{}]'.format( self.logger.info('checking {}/{}@{}[{}]'.format(
@ -438,7 +412,7 @@ class RepoChecker(ReviewBot.ReviewBot):
whitelist = self.binary_whitelist(repository_pairs[0], repository_pairs[1], arch) whitelist = self.binary_whitelist(repository_pairs[0], repository_pairs[1], arch)
results = { results = {
'cycle': self.cycle_check(repository_pairs[0], repository_pairs[1], arch), 'cycle': self.cycle_check(repository_pairs[0][0], repository_pairs[0][1], arch, cycle_packages),
'install': self.install_check( 'install': self.install_check(
repository_pairs[1], arch, directories, ignore, whitelist), repository_pairs[1], arch, directories, ignore, whitelist),
} }
@ -576,6 +550,7 @@ class RepoChecker(ReviewBot.ReviewBot):
config = Config.get(self.apiurl, action.tgt_project) config = Config.get(self.apiurl, action.tgt_project)
staging = config.get('staging') staging = config.get('staging')
arch_whitelist = config.get('repo_checker-arch-whitelist') arch_whitelist = config.get('repo_checker-arch-whitelist')
cycle_packages = config.get('repo_checker-allowed-in-cycles')
if staging: if staging:
api = self.staging_api(staging) api = self.staging_api(staging)
if not api.is_adi_project(repository_pairs[0][0]): if not api.is_adi_project(repository_pairs[0][0]):
@ -585,7 +560,10 @@ class RepoChecker(ReviewBot.ReviewBot):
whitelist = config.get('repo_checker-binary-whitelist-ring', '').split(' ') whitelist = config.get('repo_checker-binary-whitelist-ring', '').split(' ')
state_hash = self.repository_state(repository_pairs, True) state_hash = self.repository_state(repository_pairs, True)
if not self.repository_check(repository_pairs, state_hash, True, arch_whitelist=arch_whitelist, whitelist=whitelist): if not self.repository_check(repository_pairs, state_hash, True,
arch_whitelist=arch_whitelist,
whitelist=whitelist,
cycle_packages=cycle_packages):
return None return None
self.review_messages['accepted'] = 'cycle and install check passed' self.review_messages['accepted'] = 'cycle and install check passed'
@ -658,7 +636,6 @@ class CommandLineInterface(ReviewBot.CommandLineInterface):
def get_optparser(self): def get_optparser(self):
parser = ReviewBot.CommandLineInterface.get_optparser(self) parser = ReviewBot.CommandLineInterface.get_optparser(self)
parser.add_option('--skip-cycle', action='store_true', help='skip cycle check')
parser.add_option('--force', action='store_true', help='force review even if project is not ready') parser.add_option('--force', action='store_true', help='force review even if project is not ready')
return parser return parser
@ -666,9 +643,6 @@ class CommandLineInterface(ReviewBot.CommandLineInterface):
def setup_checker(self): def setup_checker(self):
bot = ReviewBot.CommandLineInterface.setup_checker(self) bot = ReviewBot.CommandLineInterface.setup_checker(self)
if self.options.skip_cycle:
bot.skip_cycle = self.options.skip_cycle
bot.force = self.options.force bot.force = self.options.force
return bot return bot
@ -678,6 +652,6 @@ class CommandLineInterface(ReviewBot.CommandLineInterface):
self.checker.check_requests() # Needed to properly init ReviewBot. self.checker.check_requests() # Needed to properly init ReviewBot.
self.checker.project_only(project, opts.post_comments) self.checker.project_only(project, opts.post_comments)
if __name__ == "__main__": if __name__ == '__main__':
app = CommandLineInterface() app = CommandLineInterface()
sys.exit(app.main()) sys.exit(app.main())