diff --git a/osc-check_repo.py b/osc-check_repo.py index 7d85ed2a..e32d07bb 100644 --- a/osc-check_repo.py +++ b/osc-check_repo.py @@ -5,21 +5,14 @@ # Copy this script to ~/.osc-plugins/ or /var/lib/osc-plugins . # Then try to run 'osc check_repo --help' to see the usage. -import cPickle -from copy import deepcopy -from datetime import datetime -from functools import wraps -import fcntl import os import re -import shelve import shutil import subprocess import tempfile -import sys - from urllib import quote_plus import urllib2 +import sys from xml.etree import cElementTree as ET from osc import oscerr @@ -35,8 +28,10 @@ from osc.core import Request # Expand sys.path to search modules inside the pluging directory _plugin_dir = os.path.expanduser('~/.osc-plugins') sys.path.append(_plugin_dir) -from osclib.graph import Graph -from osclib.stagingapi import StagingAPI +from osclib.checkrepo import CheckRepo +from osclib.cycle import CycleDetector +from osclib.memoize import memoize, CACHEDIR + # Directory where download binary packages. DOWNLOADS = os.path.expanduser('~/co/downloads') @@ -45,170 +40,14 @@ DOWNLOADS = os.path.expanduser('~/co/downloads') # XXX - Ugly Hack. Because the way that osc import plugings we need to # declare some functions and objects used in the decorator as global # -global cPickle -global deepcopy -global datetime -global fcntl -global shelve global tempfile global wraps -global Package_ - -global memoize - global build global last_build_success -global builddepinfo global jobhistory -class Package_(object): - """Simple package container. Used in a graph as a vertex.""" - - def __init__(self, pkg=None, src=None, deps=None, subs=None, element=None): - self.pkg = pkg - self.src = src - self.deps = deps - self.subs = subs - if element: - self.load(element) - - def load(self, element): - """Load a node from a ElementTree package XML element""" - self.pkg = element.attrib['name'] - self.src = [e.text for e in element.findall('source')] - assert len(self.src) == 1, 'There are more that one source packages in the graph' - self.src = self.src[0] - self.deps = set(e.text for e in element.findall('pkgdep')) - self.subs = set(e.text for e in element.findall('subpkg')) - - def __repr__(self): - return 'PKG: %s\nSRC: %s\nDEPS: %s\n SUBS: %s' % (self.pkg, self.src, self.deps, self.subs) - -TMPDIR = '/var/cache/repo-checker' # Where the cache files are stored - - -def memoize(ttl=None): - """Decorator function to implement a persistent cache. - - >>> @memoize() - ... def test_func(a): - ... return a - - Internally, the memoized function has a cache: - - >>> cache = [c.cell_contents for c in test_func.func_closure if 'sync' in dir(c.cell_contents)][0] - >>> 'sync' in dir(cache) - True - - There is a limit of the size of the cache - - >>> for k in cache: - ... del cache[k] - >>> len(cache) - 0 - - >>> for i in range(4095): - ... test_func(i) - ... len(cache) - 4095 - - >>> test_func(0) - 0 - - >>> len(cache) - 4095 - - >>> test_func(4095) - 4095 - - >>> len(cache) - 3072 - - >>> test_func(0) - 0 - - >>> len(cache) - 3073 - - >>> from datetime import timedelta - >>> k = [k for k in cache if cPickle.loads(k) == ((0,), {})][0] - >>> t, v = cache[k] - >>> t = t - timedelta(days=10) - >>> cache[k] = (t, v) - >>> test_func(0) - 0 - >>> t2, v = cache[k] - >>> t != t2 - True - - """ - - # Configuration variables - SLOTS = 4096 # Number of slots in the cache file - NCLEAN = 1024 # Number of slots to remove when limit reached - TIMEOUT = 60*60*2 # Time to live for every cache slot (seconds) - - def _memoize(f): - # Implement a POSIX lock / unlock extension for shelves. Inspired - # on ActiveState Code recipe #576591 - def _lock(filename): - lckfile = open(filename + '.lck', 'w') - fcntl.flock(lckfile.fileno(), fcntl.LOCK_EX) - return lckfile - - def _unlock(lckfile): - fcntl.flock(lckfile.fileno(), fcntl.LOCK_UN) - lckfile.close() - - def _open_cache(cache_name): - lckfile = _lock(cache_name) - cache = shelve.open(cache_name, protocol=-1) - # Store a reference to the lckfile to avoid to be closed by gc - cache.lckfile = lckfile - return cache - - def _close_cache(cache): - cache.close() - _unlock(cache.lckfile) - - def _clean_cache(cache): - len_cache = len(cache) - if len_cache >= SLOTS: - nclean = NCLEAN + len_cache - SLOTS - keys_to_delete = sorted(cache, key=lambda k: cache[k][0])[:nclean] - for key in keys_to_delete: - del cache[key] - - @wraps(f) - def _f(*args, **kwargs): - def total_seconds(td): - return (td.microseconds + (td.seconds + td.days * 24 * 3600.) * 10**6) / 10**6 - now = datetime.now() - key = cPickle.dumps((args, kwargs), protocol=-1) - updated = False - cache = _open_cache(cache_name) - if key in cache: - timestamp, value = cache[key] - updated = True if total_seconds(now-timestamp) < ttl else False - if not updated: - value = f(*args, **kwargs) - cache[key] = (now, value) - _clean_cache(cache) - _close_cache(cache) - return value - - cache_dir = os.path.expanduser(TMPDIR) - if not os.path.exists(cache_dir): - os.makedirs(cache_dir) - cache_name = os.path.join(cache_dir, f.__name__) - return _f - - ttl = ttl if ttl else TIMEOUT - return _memoize - - @memoize() def build(apiurl, project, repo, arch, package): root = None @@ -236,18 +75,6 @@ def last_build_success(apiurl, src_project, tgt_project, src_package, rev): return root -@memoize(ttl=60*60*6) -def builddepinfo(apiurl, project, repository, arch): - root = None - try: - print('Generating _builddepinfo for (%s, %s, %s)' % (project, repository, arch)) - url = makeurl(apiurl, ['/build/%s/%s/%s/_builddepinfo' % (project, repository, arch)]) - root = http_GET(url).read() - except urllib2.HTTPError, e: - print('ERROR in URL %s [%s]' % (url, e)) - return root - - def get_project_repos(apiurl, src_project, tgt_project, src_package, rev): """Read the repositories of the project from _meta.""" # XXX TODO - Shitty logic here. A better proposal is refactorize @@ -290,7 +117,7 @@ def old_md5(apiurl, src_project, tgt_project, src_package, rev): break if status not in ('succeeded', 'outdated'): continue - + url = makeurl(apiurl, ['build', src_project, repository, arch, '_jobhistory'], query=query) try: @@ -656,7 +483,7 @@ def _check_repo_download(self, p, opts): todownload.append(('x86_64', fn[0], fn[3])) # now fetch -32bit packs - #for fn in self._check_repo_repo_list(p.sproject, repo, 'i586', p.spackage, opts): + # for fn in self._check_repo_repo_list(p.sproject, repo, 'i586', p.spackage, opts): # if fn[2] == 'x86_64': # todownload.append(('i586', fn[0], fn[3])) @@ -697,80 +524,6 @@ def _get_buildinfo(self, opts, prj, repo, arch, pkg): return [e.attrib['name'] for e in root.findall('bdep')] -def _get_builddepinfo(self, opts, prj, repo, arch, pkg): - """Get the builddep info for a single package""" - root = ET.fromstring(builddepinfo(opts.apiurl, prj, repo, arch)) - packages = [Package_(element=e) for e in root.findall('package')] - package = [p for p in packages if p.pkg == pkg] - return package[0] if package else None - - -# Store packages prevoiusly ignored. Don't pollute the screen. -global _ignore_packages -_ignore_packages = set() - - -def _get_builddepinfo_graph(self, opts, project='openSUSE:Factory', repository='standard', arch='x86_64'): - """Generate the buildepinfo graph for a given architecture.""" - - _IGNORE_PREFIX = ('texlive-', 'master-boot-code') - - # Note, by default generate the graph for all Factory. If you only - # need the base packages you can use: - # project = 'Base:System' - # repository = 'openSUSE_Factory' - - root = ET.fromstring(builddepinfo(opts.apiurl, project, repository, arch)) - # Reset the subpackages dict here, so for every graph is a - # different object. - packages = [Package_(element=e) for e in root.findall('package')] - - # XXX - Ugly Exception. We need to ignore branding packages and - # packages that one of his dependencies do not exist. Also ignore - # preinstall images. - packages = [p for p in packages if not ('branding' in p.pkg or p.pkg.startswith('preinstallimage-'))] - - graph = Graph() - graph.add_nodes_from((p.pkg, p) for p in packages) - - subpkgs = {} # Given a subpackage, recover the source package - for p in packages: - # Check for packages that provides the same subpackage - for subpkg in p.subs: - if subpkg in subpkgs: - # print 'Subpackage duplication %s - %s (subpkg: %s)' % (p.pkg, subpkgs[subpkg], subpkg) - pass - else: - subpkgs[subpkg] = p.pkg - - for p in packages: - # Calculate the missing deps - deps = [d for d in p.deps if 'branding' not in d] - missing = [d for d in deps if not d.startswith(_IGNORE_PREFIX) and d not in subpkgs] - if missing: - if p.pkg not in _ignore_packages: - # print 'Ignoring package. Missing dependencies %s -> (%s) %s...' % (p.pkg, len(missing), missing[:5]) - _ignore_packages.add(p.pkg) - continue - - # XXX - Ugly Hack. Subpagackes for texlive are not correctly - # generated. If the dependency starts with texlive- prefix, - # assume that the correct source package is texlive. - graph.add_edges_from((p.pkg, subpkgs[d] if not d.startswith('texlive-') else 'texlive') - for d in deps if not d.startswith('master-boot-code')) - - # Store the subpkgs dict in the graph. It will be used later. - graph.subpkgs = subpkgs - return graph - - -def _get_builddepinfo_cycles(self, opts, package='openSUSE:Factory', repository='standard', arch='x86_64'): - """Generate the buildepinfo cycle list for a given architecture.""" - root = ET.fromstring(builddepinfo(opts.apiurl, package, repository, arch)) - return frozenset(frozenset(e.text for e in cycle.findall('package')) - for cycle in root.findall('cycle')) - - def _check_repo_group(self, id_, reqs, opts): print '\nCheck group', reqs if not all(self._check_repo_buildsuccess(r, opts) for r in reqs): @@ -811,52 +564,10 @@ def _check_repo_group(self, id_, reqs, opts): p.updated = True toignore.update(i) - # Detect cycles - We create the full graph from _builddepinfo. - for arch in ('x86_64',): - factory_graph = self._get_builddepinfo_graph(opts, arch=arch) - factory_cycles = factory_graph.cycles() - # This graph will be updated for every request - current_graph = deepcopy(factory_graph) - - subpkgs = current_graph.subpkgs - - # Recover all packages at once, ignoring some packages that - # can't be found in x86_64 architecture. - # - # The first filter is to remove some packages that do not have - # `goodrepos`. Thouse packages are usually marks as 'p.update - # = True' (meaning that they are declined or there is a new - # updated review. - all_packages = [self._get_builddepinfo(opts, p.sproject, p.goodrepos[0], arch, p.spackage) - for p in packs if not p.updated] - all_packages = [pkg for pkg in all_packages if pkg] - - subpkgs.update(dict((p, pkg.pkg) for pkg in all_packages for p in pkg.subs)) - - for pkg in all_packages: - # Update the current graph and see if we have different cycles - edges_to = () - if pkg.pkg in current_graph: - current_graph[pkg.pkg] = pkg - current_graph.remove_edges_from(set((pkg.pkg, p) for p in current_graph.edges(pkg.pkg))) - edges_to = current_graph.edges_to(pkg.pkg) - current_graph.remove_edges_from(set((p, pkg.pkg) for p in edges_to)) - else: - current_graph.add_node(pkg.pkg, pkg) - current_graph.add_edges_from((pkg.pkg, subpkgs[p]) for p in pkg.deps if p in subpkgs) - current_graph.add_edges_from((p, pkg.pkg) for p in edges_to - if pkg.pkg in set(subpkgs[sp] for sp in current_graph[p].deps if sp in subpkgs)) - - for cycle in current_graph.cycles(): - if cycle not in factory_cycles: - print - print 'New cycle detected:', sorted(cycle) - factory_edges = set((u, v) for u in cycle for v in factory_graph.edges(u) if v in cycle) - current_edges = set((u, v) for u in cycle for v in current_graph.edges(u) if v in cycle) - print 'New edges:', sorted(current_edges - factory_edges) - # Mark all packages as updated, to avoid to be accepted - for p in reqs: - p.updated = True + # Detect cycles into the current Factory graph after we update the + # links with the current list of request. + cycle_detector = CycleDetector(opts.apiurl) + cycle_detector.cycles(packs) for p in reqs: smissing = [] @@ -982,31 +693,20 @@ def do_check_repo(self, subcmd, opts, *args): """ opts.mode = '' - opts.verbose = False - opts.apiurl = self.get_api_url() - api = StagingAPI(opts.apiurl) - # grouped = { id: staging, } - opts.grouped = {} - for prj in api.get_staging_projects(): - meta = api.get_prj_pseudometa(prj) - for req in meta['requests']: - opts.grouped[req['id']] = prj - for req in api.list_requests_in_prj(prj): - opts.grouped[req] = prj + checkrepo = CheckRepo(opts.apiurl) - # groups = { staging: [ids,], } - opts.groups = {} - for req, prj in opts.grouped.items(): - group = opts.groups.get(prj, []) - group.append(req) - opts.groups[prj] = group + # XXX TODO - Remove this the all access to opt.group[s|ed] comes + # from checkrepo. + opts.grouped = checkrepo.grouped + opts.groups = checkrepo.groups if opts.skip: if not len(args): - raise oscerr.WrongArgs('Please give, if you want to skip a review specify a SRID') + raise oscerr.WrongArgs('Provide #IDs to skip.') + for id_ in args: msg = 'skip review' print 'ACCEPTED', msg @@ -1040,7 +740,7 @@ def do_check_repo(self, subcmd, opts, *args): groups[p.group] = a self.repocheckerdir = os.path.dirname(os.path.realpath(os.path.expanduser('~/.osc-plugins/osc-check_repo.py'))) - self.repodir = "%s/repo-%s-%s-x86_64" % (TMPDIR, 'openSUSE:Factory', 'standard') + self.repodir = "%s/repo-%s-%s-x86_64" % (CACHEDIR, 'openSUSE:Factory', 'standard') if not os.path.exists(self.repodir): os.mkdir(self.repodir) civs = 'LC_ALL=C perl %s/bs_mirrorfull --nodebug https://build.opensuse.org/build/%s/%s/x86_64 %s' % ( diff --git a/osclib/checkrepo.py b/osclib/checkrepo.py new file mode 100644 index 00000000..80c1dae0 --- /dev/null +++ b/osclib/checkrepo.py @@ -0,0 +1,72 @@ +# Copyright (C) 2014 SUSE Linux Products GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import urllib2 +from xml.etree import cElementTree as ET + +from osc.core import http_POST +from osc.core import makeurl + +from osclib.stagingapi import StagingAPI + + +class CheckRepo(object): + + def __init__(self, apiurl): + """CheckRepo constructor.""" + self.apiurl = apiurl + self.staging = StagingAPI(apiurl) + + # grouped = { id: staging, } + self.grouped = {} + # groups = { staging: [ids,], } + self.groups = {} + self._staging() + + def _staging(self): + """ + Preload the groups of related request associated by the same + staging project. + """ + for project in self.staging.get_staging_projects(): + # Get all the requests identifier for the project + requests = self.staging.get_prj_pseudometa(project)['requests'] + requests = [req['id'] for req in requests] + + # Note: Originally we recover also the request returned by + # list_requests_in_prj(). I guest that if the staging + # project is working properly, this method do not add any + # new request to the list. + if requests: + self.groups[project] = requests + self.grouped.update({req: project for req in requests}) + + def change_review_state(self, request_id, newstate, message=''): + """Based on osc/osc/core.py. Fixed 'by_user'.""" + query = { + 'cmd': 'changereviewstate', + 'newstate': newstate, + 'by_user': 'factory-repo-checker', + } + + code = 404 + url = makeurl(self.apiurl, ['request', str(request_id)], query=query) + try: + root = ET.parse(http_POST(url, data=message)).getroot() + code = root.attrib['code'] + except urllib2.HTTPError, e: + print('ERROR in URL %s [%s]' % (url, e)) + return code diff --git a/osclib/cycle.py b/osclib/cycle.py new file mode 100644 index 00000000..0811f49f --- /dev/null +++ b/osclib/cycle.py @@ -0,0 +1,279 @@ +# Copyright (C) 2014 SUSE Linux Products GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from copy import deepcopy +import urllib2 +from xml.etree import cElementTree as ET + +from osc.core import http_GET +from osc.core import makeurl + +from .memoize import memoize + + +class Graph(dict): + """Graph object. Inspired in NetworkX data model.""" + + def __init__(self): + """Initialize an empty graph.""" + # The nodes are stored in the Graph dict itself, but the + # adjacent list is stored as an attribute. + self.adj = {} + + def add_node(self, name, value): + """Add a node in the graph.""" + self[name] = value + if name not in self.adj: + self.adj[name] = set() + + def add_nodes_from(self, nodes_and_values): + """Add multiple nodes""" + for node, value in nodes_and_values: + self.add_node(node, value) + + def add_edge(self, u, v, directed=True): + """Add the edge u -> v, an v -> u if not directed.""" + self.adj[u].add(v) + if not directed: + self.adj[v].add(u) + + def add_edges_from(self, edges, directed=True): + """Add the edges from an iterator.""" + for u, v in edges: + self.add_edge(u, v, directed) + + def remove_edge(self, u, v, directed=True): + """Remove the edge u -> v, an v -> u if not directed.""" + try: + self.adj[u].remove(v) + except KeyError: + pass + if not directed: + try: + self.adj[v].remove(u) + except KeyError: + pass + + def remove_edges_from(self, edges, directed=True): + """Remove the edges from an iterator.""" + for u, v in edges: + self.remove_edge(u, v, directed) + + def edges(self, v): + """Get the adjancent list for a vertex.""" + return sorted(self.adj[v]) if v in self else () + + def edges_to(self, v): + """Get the all the vertex that point to v.""" + return sorted(u for u in self.adj if v in self.adj[u]) + + def cycles(self): + """Detect cycles using Tarjan algorithm.""" + index = [0] + path = [] + cycles = [] + + v_index = {} + v_lowlink = {} + + def scc(node, v): + v_index[v], v_lowlink[v] = index[0], index[0] + index[0] += 1 + path.append(node) + + for succ in self.adj.get(node, []): + w = self[succ] + if w not in v_index: + scc(succ, w) + v_lowlink[v] = min(v_lowlink[v], v_lowlink[w]) + elif succ in path: + v_lowlink[v] = min(v_lowlink[v], v_index[w]) + + if v_index[v] == v_lowlink[v]: + i = path.index(node) + path[:], cycle = path[:i], frozenset(path[i:]) + if len(cycle) > 1: + cycles.append(cycle) + + for node in sorted(self): + v = self[node] + if not getattr(v, 'index', 0): + scc(node, v) + return frozenset(cycles) + + +class Package(object): + """Simple package container. Used in a graph as a vertex.""" + + def __init__(self, pkg=None, src=None, deps=None, subs=None, + element=None): + self.pkg = pkg + self.src = src + self.deps = deps + self.subs = subs + if element: + self.load(element) + + def load(self, element): + """Load a node from a ElementTree package XML element""" + self.pkg = element.attrib['name'] + self.src = [e.text for e in element.findall('source')] + assert len(self.src) == 1, 'There are more that one source packages in the graph' + self.src = self.src[0] + self.deps = set(e.text for e in element.findall('pkgdep')) + self.subs = set(e.text for e in element.findall('subpkg')) + + def __repr__(self): + return 'PKG: %s\nSRC: %s\nDEPS: %s\n SUBS: %s' % (self.pkg, + self.src, + self.deps, + self.subs) + + +class CycleDetector(object): + """Class to detect cycles in Factory.""" + + def __init__(self, apiurl): + self.apiurl = apiurl + # Store packages prevoiusly ignored. Don't pollute the screen. + self._ignore_packages = set() + + @memoize(ttl=60*60*6) + def _builddepinfo(self, project, repository, arch): + root = None + try: + print('Generating _builddepinfo for (%s, %s, %s)' % (project, repository, arch)) + url = makeurl(self.apiurl, ['/build/%s/%s/%s/_builddepinfo' % (project, repository, arch)]) + root = http_GET(url).read() + except urllib2.HTTPError, e: + print('ERROR in URL %s [%s]' % (url, e)) + return root + + def _get_builddepinfo(self, project, repository, arch, package): + """Get the builddep info for a single package""" + root = ET.fromstring(self._builddepinfo(project, repository, arch)) + packages = [Package(element=e) for e in root.findall('package')] + package = [p for p in packages if p.pkg == package] + return package[0] if package else None + + def _get_builddepinfo_graph(self, project, repository, arch): + """Generate the buildepinfo graph for a given architecture.""" + + _IGNORE_PREFIX = ('texlive-', 'master-boot-code') + + # Note, by default generate the graph for all Factory. If you only + # need the base packages you can use: + # project = 'Base:System' + # repository = 'openSUSE_Factory' + + root = ET.fromstring(self._builddepinfo(project, repository, arch)) + # Reset the subpackages dict here, so for every graph is a + # different object. + packages = [Package(element=e) for e in root.findall('package')] + + # XXX - Ugly Exception. We need to ignore branding packages and + # packages that one of his dependencies do not exist. Also ignore + # preinstall images. + packages = [p for p in packages if not ('branding' in p.pkg or p.pkg.startswith('preinstallimage-'))] + + graph = Graph() + graph.add_nodes_from((p.pkg, p) for p in packages) + + subpkgs = {} # Given a subpackage, recover the source package + for p in packages: + # Check for packages that provides the same subpackage + for subpkg in p.subs: + if subpkg in subpkgs: + # print 'Subpackage duplication %s - %s (subpkg: %s)' % (p.pkg, subpkgs[subpkg], subpkg) + pass + else: + subpkgs[subpkg] = p.pkg + + for p in packages: + # Calculate the missing deps + deps = [d for d in p.deps if 'branding' not in d] + missing = [d for d in deps if not d.startswith(_IGNORE_PREFIX) and d not in subpkgs] + if missing: + if p.pkg not in self._ignore_packages: + # print 'Ignoring package. Missing dependencies %s -> (%s) %s...' % (p.pkg, len(missing), missing[:5]) + self._ignore_packages.add(p.pkg) + continue + + # XXX - Ugly Hack. Subpagackes for texlive are not correctly + # generated. If the dependency starts with texlive- prefix, + # assume that the correct source package is texlive. + graph.add_edges_from((p.pkg, subpkgs[d] if not d.startswith('texlive-') else 'texlive') + for d in deps if not d.startswith('master-boot-code')) + + # Store the subpkgs dict in the graph. It will be used later. + graph.subpkgs = subpkgs + return graph + + def _get_builddepinfo_cycles(self, package, repository, arch): + """Generate the buildepinfo cycle list for a given architecture.""" + root = ET.fromstring(self._builddepinfo(package, repository, arch)) + return frozenset(frozenset(e.text for e in cycle.findall('package')) + for cycle in root.findall('cycle')) + + def cycles(self, packs, project='openSUSE:Factory', repository='standard', arch='x86_64'): + """Detect cycles in a specific repository.""" + + # Detect cycles - We create the full graph from _builddepinfo. + factory_graph = self._get_builddepinfo_graph(project, repository, arch) + factory_cycles = factory_graph.cycles() + + # This graph will be updated for every request + current_graph = deepcopy(factory_graph) + + subpkgs = current_graph.subpkgs + + # Recover all packages at once, ignoring some packages that + # can't be found in x86_64 architecture. + # + # The first filter is to remove some packages that do not have + # `goodrepos`. Those packages are usually marked as 'p.update + # = True' (meaning that they are declined or there is a new + # updated review). + all_packages = [self._get_builddepinfo(p.sproject, p.goodrepos[0], arch, p.spackage) + for p in packs if not p.updated] + all_packages = [pkg for pkg in all_packages if pkg] + + subpkgs.update(dict((p, pkg.pkg) for pkg in all_packages for p in pkg.subs)) + + for pkg in all_packages: + # Update the current graph and see if we have different cycles + edges_to = () + if pkg.pkg in current_graph: + current_graph[pkg.pkg] = pkg + current_graph.remove_edges_from(set((pkg.pkg, p) for p in current_graph.edges(pkg.pkg))) + edges_to = current_graph.edges_to(pkg.pkg) + current_graph.remove_edges_from(set((p, pkg.pkg) for p in edges_to)) + else: + current_graph.add_node(pkg.pkg, pkg) + current_graph.add_edges_from((pkg.pkg, subpkgs[p]) for p in pkg.deps if p in subpkgs) + current_graph.add_edges_from((p, pkg.pkg) for p in edges_to + if pkg.pkg in set(subpkgs[sp] for sp in current_graph[p].deps if sp in subpkgs)) + + for cycle in current_graph.cycles(): + if cycle not in factory_cycles: + print + print 'New cycle detected:', sorted(cycle) + factory_edges = set((u, v) for u in cycle for v in factory_graph.edges(u) if v in cycle) + current_edges = set((u, v) for u in cycle for v in current_graph.edges(u) if v in cycle) + print 'New edges:', sorted(current_edges - factory_edges) + # Mark all packages as updated, to avoid to be accepted + for p in reqs: + p.updated = True diff --git a/osclib/graph.py b/osclib/graph.py deleted file mode 100644 index 06474e63..00000000 --- a/osclib/graph.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (C) 2014 SUSE Linux Products GmbH -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -class Graph(dict): - """Graph object. Inspired in NetworkX data model.""" - - def __init__(self): - """Initialize an empty graph.""" - # The nodes are stored in the Graph dict itself, but the - # adjacent list is stored as an attribute. - self.adj = {} - - def add_node(self, name, value): - """Add a node in the graph.""" - self[name] = value - if name not in self.adj: - self.adj[name] = set() - - def add_nodes_from(self, nodes_and_values): - """Add multiple nodes""" - for node, value in nodes_and_values: - self.add_node(node, value) - - def add_edge(self, u, v, directed=True): - """Add the edge u -> v, an v -> u if not directed.""" - self.adj[u].add(v) - if not directed: - self.adj[v].add(u) - - def add_edges_from(self, edges, directed=True): - """Add the edges from an iterator.""" - for u, v in edges: - self.add_edge(u, v, directed) - - def remove_edge(self, u, v, directed=True): - """Remove the edge u -> v, an v -> u if not directed.""" - try: - self.adj[u].remove(v) - except KeyError: - pass - if not directed: - try: - self.adj[v].remove(u) - except KeyError: - pass - - def remove_edges_from(self, edges, directed=True): - """Remove the edges from an iterator.""" - for u, v in edges: - self.remove_edge(u, v, directed) - - def edges(self, v): - """Get the adjancent list for a vertex.""" - return sorted(self.adj[v]) if v in self else () - - def edges_to(self, v): - """Get the all the vertex that point to v.""" - return sorted(u for u in self.adj if v in self.adj[u]) - - def cycles(self): - """Detect cycles using Tarjan algorithm.""" - index = [0] - path = [] - cycles = [] - - v_index = {} - v_lowlink = {} - - def scc(node, v): - v_index[v], v_lowlink[v] = index[0], index[0] - index[0] += 1 - path.append(node) - - for succ in self.adj.get(node, []): - w = self[succ] - if w not in v_index: - scc(succ, w) - v_lowlink[v] = min(v_lowlink[v], v_lowlink[w]) - elif succ in path: - v_lowlink[v] = min(v_lowlink[v], v_index[w]) - - if v_index[v] == v_lowlink[v]: - i = path.index(node) - path[:], cycle = path[:i], frozenset(path[i:]) - if len(cycle) > 1: - cycles.append(cycle) - - for node in sorted(self): - v = self[node] - if not getattr(v, 'index', 0): - scc(node, v) - return frozenset(cycles) diff --git a/osclib/memoize.py b/osclib/memoize.py new file mode 100644 index 00000000..1dcbe400 --- /dev/null +++ b/osclib/memoize.py @@ -0,0 +1,149 @@ +# Copyright (C) 2014 SUSE Linux Products GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from datetime import datetime +import fcntl +from functools import wraps +import os +import shelve +try: + import cPickle as pickle +except: + import pickle + + +# Where the cache files are stored +CACHEDIR = '/var/cache/repo-checker' + + +def memoize(ttl=None): + """Decorator function to implement a persistent cache. + + >>> @memoize() + ... def test_func(a): + ... return a + + Internally, the memoized function has a cache: + + >>> cache = [c.cell_contents for c in test_func.func_closure if 'sync' in dir(c.cell_contents)][0] + >>> 'sync' in dir(cache) + True + + There is a limit of the size of the cache + + >>> for k in cache: + ... del cache[k] + >>> len(cache) + 0 + + >>> for i in range(4095): + ... test_func(i) + ... len(cache) + 4095 + + >>> test_func(0) + 0 + + >>> len(cache) + 4095 + + >>> test_func(4095) + 4095 + + >>> len(cache) + 3072 + + >>> test_func(0) + 0 + + >>> len(cache) + 3073 + + >>> from datetime import timedelta + >>> k = [k for k in cache if cPickle.loads(k) == ((0,), {})][0] + >>> t, v = cache[k] + >>> t = t - timedelta(days=10) + >>> cache[k] = (t, v) + >>> test_func(0) + 0 + >>> t2, v = cache[k] + >>> t != t2 + True + + """ + + # Configuration variables + SLOTS = 4096 # Number of slots in the cache file + NCLEAN = 1024 # Number of slots to remove when limit reached + TIMEOUT = 60*60*2 # Time to live for every cache slot (seconds) + + def _memoize(fn): + # Implement a POSIX lock / unlock extension for shelves. Inspired + # on ActiveState Code recipe #576591 + def _lock(filename): + lckfile = open(filename + '.lck', 'w') + fcntl.flock(lckfile.fileno(), fcntl.LOCK_EX) + return lckfile + + def _unlock(lckfile): + fcntl.flock(lckfile.fileno(), fcntl.LOCK_UN) + lckfile.close() + + def _open_cache(cache_name): + lckfile = _lock(cache_name) + cache = shelve.open(cache_name, protocol=-1) + # Store a reference to the lckfile to avoid to be closed by gc + cache.lckfile = lckfile + return cache + + def _close_cache(cache): + cache.close() + _unlock(cache.lckfile) + + def _clean_cache(cache): + len_cache = len(cache) + if len_cache >= SLOTS: + nclean = NCLEAN + len_cache - SLOTS + keys_to_delete = sorted(cache, key=lambda k: cache[k][0])[:nclean] + for key in keys_to_delete: + del cache[key] + + @wraps(fn) + def _fn(*args, **kwargs): + def total_seconds(td): + return (td.microseconds + (td.seconds + td.days * 24 * 3600.) * 10**6) / 10**6 + now = datetime.now() + key = pickle.dumps((args, kwargs), protocol=-1) + updated = False + cache = _open_cache(cache_name) + if key in cache: + timestamp, value = cache[key] + updated = True if total_seconds(now-timestamp) < ttl else False + if not updated: + value = fn(*args, **kwargs) + cache[key] = (now, value) + _clean_cache(cache) + _close_cache(cache) + return value + + cache_dir = os.path.expanduser(CACHEDIR) + if not os.path.exists(cache_dir): + os.makedirs(cache_dir) + cache_name = os.path.join(cache_dir, fn.__name__) + return _fn + + ttl = ttl if ttl else TIMEOUT + return _memoize