More refactoring:

- Extract all the graph and cycle logic. - Extract memoize. - Start with CheckRepo.
2014-05-26 16:03:06 +02:00 · 2014-05-26 16:03:06 +02:00 · e96f85dc38
commit e96f85dc38
parent 75b211ff5a
5 changed files with 519 additions and 424 deletions
--- a/osc-check_repo.py
+++ b/osc-check_repo.py
@ -5,21 +5,14 @@
 # Copy this script to ~/.osc-plugins/ or /var/lib/osc-plugins .
 # Then try to run 'osc check_repo --help' to see the usage.

-import cPickle
-from copy import deepcopy
-from datetime import datetime
-from functools import wraps
-import fcntl
 import os
 import re
-import shelve
 import shutil
 import subprocess
 import tempfile
-import sys
-
 from urllib import quote_plus
 import urllib2
+import sys
 from xml.etree import cElementTree as ET

 from osc import oscerr
@ -35,8 +28,10 @@ from osc.core import Request
 # Expand sys.path to search modules inside the pluging directory
 _plugin_dir = os.path.expanduser('~/.osc-plugins')
 sys.path.append(_plugin_dir)
-from osclib.graph import Graph
-from osclib.stagingapi import StagingAPI
+from osclib.checkrepo import CheckRepo
+from osclib.cycle import CycleDetector
+from osclib.memoize import memoize, CACHEDIR
+

 # Directory where download binary packages.
 DOWNLOADS = os.path.expanduser('~/co/downloads')
@ -45,170 +40,14 @@ DOWNLOADS = os.path.expanduser('~/co/downloads')
 # XXX - Ugly Hack. Because the way that osc import plugings we need to
 # declare some functions and objects used in the decorator as global
 #
-global cPickle
-global deepcopy
-global datetime
-global fcntl
-global shelve
 global tempfile
 global wraps

-global Package_
-
-global memoize
-
 global build
 global last_build_success
-global builddepinfo
 global jobhistory


-class Package_(object):
-    """Simple package container. Used in a graph as a vertex."""
-
-    def __init__(self, pkg=None, src=None, deps=None, subs=None, element=None):
-        self.pkg = pkg
-        self.src = src
-        self.deps = deps
-        self.subs = subs
-        if element:
-            self.load(element)
-
-    def load(self, element):
-        """Load a node from a ElementTree package XML element"""
-        self.pkg = element.attrib['name']
-        self.src = [e.text for e in element.findall('source')]
-        assert len(self.src) == 1, 'There are more that one source packages in the graph'
-        self.src = self.src[0]
-        self.deps = set(e.text for e in element.findall('pkgdep'))
-        self.subs = set(e.text for e in element.findall('subpkg'))
-
-    def __repr__(self):
-        return 'PKG: %s\nSRC: %s\nDEPS: %s\n SUBS: %s' % (self.pkg, self.src, self.deps, self.subs)
-
-TMPDIR = '/var/cache/repo-checker'  # Where the cache files are stored
-
-
-def memoize(ttl=None):
-    """Decorator function to implement a persistent cache.
-
-    >>> @memoize()
-    ... def test_func(a):
-    ...     return a
-
-    Internally, the memoized function has a cache:
-
-    >>> cache = [c.cell_contents for c in test_func.func_closure if 'sync' in dir(c.cell_contents)][0]
-    >>> 'sync' in dir(cache)
-    True
-
-    There is a limit of the size of the cache
-
-    >>> for k in cache:
-    ...     del cache[k]
-    >>> len(cache)
-    0
-
-    >>> for i in range(4095):
-    ...     test_func(i)
-    ... len(cache)
-    4095
-
-    >>> test_func(0)
-    0
-
-    >>> len(cache)
-    4095
-
-    >>> test_func(4095)
-    4095
-
-    >>> len(cache)
-    3072
-
-    >>> test_func(0)
-    0
-
-    >>> len(cache)
-    3073
-
-    >>> from datetime import timedelta
-    >>> k = [k for k in cache if cPickle.loads(k) == ((0,), {})][0]
-    >>> t, v = cache[k]
-    >>> t = t - timedelta(days=10)
-    >>> cache[k] = (t, v)
-    >>> test_func(0)
-    0
-    >>> t2, v = cache[k]
-    >>> t != t2
-    True
-
-    """
-
-    # Configuration variables
-    SLOTS = 4096            # Number of slots in the cache file
-    NCLEAN = 1024           # Number of slots to remove when limit reached
-    TIMEOUT = 60*60*2       # Time to live for every cache slot (seconds)
-
-    def _memoize(f):
-        # Implement a POSIX lock / unlock extension for shelves. Inspired
-        # on ActiveState Code recipe #576591
-        def _lock(filename):
-            lckfile = open(filename + '.lck', 'w')
-            fcntl.flock(lckfile.fileno(), fcntl.LOCK_EX)
-            return lckfile
-
-        def _unlock(lckfile):
-            fcntl.flock(lckfile.fileno(), fcntl.LOCK_UN)
-            lckfile.close()
-
-        def _open_cache(cache_name):
-            lckfile = _lock(cache_name)
-            cache = shelve.open(cache_name, protocol=-1)
-            # Store a reference to the lckfile to avoid to be closed by gc
-            cache.lckfile = lckfile
-            return cache
-
-        def _close_cache(cache):
-            cache.close()
-            _unlock(cache.lckfile)
-
-        def _clean_cache(cache):
-            len_cache = len(cache)
-            if len_cache >= SLOTS:
-                nclean = NCLEAN + len_cache - SLOTS
-                keys_to_delete = sorted(cache, key=lambda k: cache[k][0])[:nclean]
-                for key in keys_to_delete:
-                    del cache[key]
-
-        @wraps(f)
-        def _f(*args, **kwargs):
-            def total_seconds(td):
-                return (td.microseconds + (td.seconds + td.days * 24 * 3600.) * 10**6) / 10**6
-            now = datetime.now()
-            key = cPickle.dumps((args, kwargs), protocol=-1)
-            updated = False
-            cache = _open_cache(cache_name)
-            if key in cache:
-                timestamp, value = cache[key]
-                updated = True if total_seconds(now-timestamp) < ttl else False
-            if not updated:
-                value = f(*args, **kwargs)
-                cache[key] = (now, value)
-            _clean_cache(cache)
-            _close_cache(cache)
-            return value
-
-        cache_dir = os.path.expanduser(TMPDIR)
-        if not os.path.exists(cache_dir):
-            os.makedirs(cache_dir)
-        cache_name = os.path.join(cache_dir, f.__name__)
-        return _f
-
-    ttl = ttl if ttl else TIMEOUT
-    return _memoize
-
-
@memoize()
 def build(apiurl, project, repo, arch, package):
    root = None
@ -236,18 +75,6 @@ def last_build_success(apiurl, src_project, tgt_project, src_package, rev):
    return root


-@memoize(ttl=60*60*6)
-def builddepinfo(apiurl, project, repository, arch):
-    root = None
-    try:
-        print('Generating _builddepinfo for (%s, %s, %s)' % (project, repository, arch))
-        url = makeurl(apiurl, ['/build/%s/%s/%s/_builddepinfo' % (project, repository, arch)])
-        root = http_GET(url).read()
-    except urllib2.HTTPError, e:
-        print('ERROR in URL %s [%s]' % (url, e))
-    return root
-
-
 def get_project_repos(apiurl, src_project, tgt_project, src_package, rev):
    """Read the repositories of the project from _meta."""
    # XXX TODO - Shitty logic here. A better proposal is refactorize
@ -697,80 +524,6 @@ def _get_buildinfo(self, opts, prj, repo, arch, pkg):
    return [e.attrib['name'] for e in root.findall('bdep')]


-def _get_builddepinfo(self, opts, prj, repo, arch, pkg):
-    """Get the builddep info for a single package"""
-    root = ET.fromstring(builddepinfo(opts.apiurl, prj, repo, arch))
-    packages = [Package_(element=e) for e in root.findall('package')]
-    package = [p for p in packages if p.pkg == pkg]
-    return package[0] if package else None
-
-
-# Store packages prevoiusly ignored. Don't pollute the screen.
-global _ignore_packages
-_ignore_packages = set()
-
-
-def _get_builddepinfo_graph(self, opts, project='openSUSE:Factory', repository='standard', arch='x86_64'):
-    """Generate the buildepinfo graph for a given architecture."""
-
-    _IGNORE_PREFIX = ('texlive-', 'master-boot-code')
-
-    # Note, by default generate the graph for all Factory. If you only
-    # need the base packages you can use:
-    #   project = 'Base:System'
-    #   repository = 'openSUSE_Factory'
-
-    root = ET.fromstring(builddepinfo(opts.apiurl, project, repository, arch))
-    # Reset the subpackages dict here, so for every graph is a
-    # different object.
-    packages = [Package_(element=e) for e in root.findall('package')]
-
-    # XXX - Ugly Exception. We need to ignore branding packages and
-    # packages that one of his dependencies do not exist. Also ignore
-    # preinstall images.
-    packages = [p for p in packages if not ('branding' in p.pkg or p.pkg.startswith('preinstallimage-'))]
-
-    graph = Graph()
-    graph.add_nodes_from((p.pkg, p) for p in packages)
-
-    subpkgs = {}    # Given a subpackage, recover the source package
-    for p in packages:
-        # Check for packages that provides the same subpackage
-        for subpkg in p.subs:
-            if subpkg in subpkgs:
-                # print 'Subpackage duplication %s - %s (subpkg: %s)' % (p.pkg, subpkgs[subpkg], subpkg)
-                pass
-            else:
-                subpkgs[subpkg] = p.pkg
-
-    for p in packages:
-        # Calculate the missing deps
-        deps = [d for d in p.deps if 'branding' not in d]
-        missing = [d for d in deps if not d.startswith(_IGNORE_PREFIX) and d not in subpkgs]
-        if missing:
-            if p.pkg not in _ignore_packages:
-                # print 'Ignoring package. Missing dependencies %s -> (%s) %s...' % (p.pkg, len(missing), missing[:5])
-                _ignore_packages.add(p.pkg)
-            continue
-
-        # XXX - Ugly Hack. Subpagackes for texlive are not correctly
-        # generated. If the dependency starts with texlive- prefix,
-        # assume that the correct source package is texlive.
-        graph.add_edges_from((p.pkg, subpkgs[d] if not d.startswith('texlive-') else 'texlive')
-                             for d in deps if not d.startswith('master-boot-code'))
-
-    # Store the subpkgs dict in the graph. It will be used later.
-    graph.subpkgs = subpkgs
-    return graph
-
-
-def _get_builddepinfo_cycles(self, opts, package='openSUSE:Factory', repository='standard', arch='x86_64'):
-    """Generate the buildepinfo cycle list for a given architecture."""
-    root = ET.fromstring(builddepinfo(opts.apiurl, package, repository, arch))
-    return frozenset(frozenset(e.text for e in cycle.findall('package'))
-                     for cycle in root.findall('cycle'))
-
-
 def _check_repo_group(self, id_, reqs, opts):
    print '\nCheck group', reqs
    if not all(self._check_repo_buildsuccess(r, opts) for r in reqs):
@ -811,52 +564,10 @@ def _check_repo_group(self, id_, reqs, opts):
            p.updated = True
        toignore.update(i)

-    # Detect cycles - We create the full graph from _builddepinfo.
-    for arch in ('x86_64',):
-        factory_graph = self._get_builddepinfo_graph(opts, arch=arch)
-        factory_cycles = factory_graph.cycles()
-        # This graph will be updated for every request
-        current_graph = deepcopy(factory_graph)
-
-        subpkgs = current_graph.subpkgs
-
-        # Recover all packages at once, ignoring some packages that
-        # can't be found in x86_64 architecture.
-        #
-        # The first filter is to remove some packages that do not have
-        # `goodrepos`. Thouse packages are usually marks as 'p.update
-        # = True' (meaning that they are declined or there is a new
-        # updated review.
-        all_packages = [self._get_builddepinfo(opts, p.sproject, p.goodrepos[0], arch, p.spackage)
-                        for p in packs if not p.updated]
-        all_packages = [pkg for pkg in all_packages if pkg]
-
-        subpkgs.update(dict((p, pkg.pkg) for pkg in all_packages for p in pkg.subs))
-
-        for pkg in all_packages:
-            # Update the current graph and see if we have different cycles
-            edges_to = ()
-            if pkg.pkg in current_graph:
-                current_graph[pkg.pkg] = pkg
-                current_graph.remove_edges_from(set((pkg.pkg, p) for p in current_graph.edges(pkg.pkg)))
-                edges_to = current_graph.edges_to(pkg.pkg)
-                current_graph.remove_edges_from(set((p, pkg.pkg) for p in edges_to))
-            else:
-                current_graph.add_node(pkg.pkg, pkg)
-            current_graph.add_edges_from((pkg.pkg, subpkgs[p]) for p in pkg.deps if p in subpkgs)
-            current_graph.add_edges_from((p, pkg.pkg) for p in edges_to
-                                         if pkg.pkg in set(subpkgs[sp] for sp in current_graph[p].deps if sp in subpkgs))
-
-        for cycle in current_graph.cycles():
-            if cycle not in factory_cycles:
-                print
-                print 'New cycle detected:', sorted(cycle)
-                factory_edges = set((u, v) for u in cycle for v in factory_graph.edges(u) if v in cycle)
-                current_edges = set((u, v) for u in cycle for v in current_graph.edges(u) if v in cycle)
-                print 'New edges:', sorted(current_edges - factory_edges)
-                # Mark all packages as updated, to avoid to be accepted
-                for p in reqs:
-                    p.updated = True
+    # Detect cycles into the current Factory graph after we update the
+    # links with the current list of request.
+    cycle_detector = CycleDetector(opts.apiurl)
+    cycle_detector.cycles(packs)

    for p in reqs:
        smissing = []
@ -982,31 +693,20 @@ def do_check_repo(self, subcmd, opts, *args):
    """

    opts.mode = ''
-
    opts.verbose = False
-
    opts.apiurl = self.get_api_url()
-    api = StagingAPI(opts.apiurl)

-    # grouped = { id: staging, }
-    opts.grouped = {}
-    for prj in api.get_staging_projects():
-        meta = api.get_prj_pseudometa(prj)
-        for req in meta['requests']:
-            opts.grouped[req['id']] = prj
-        for req in api.list_requests_in_prj(prj):
-            opts.grouped[req] = prj
+    checkrepo = CheckRepo(opts.apiurl)

-    # groups = { staging: [ids,], }
-    opts.groups = {}
-    for req, prj in opts.grouped.items():
-        group = opts.groups.get(prj, [])
-        group.append(req)
-        opts.groups[prj] = group
+    # XXX TODO - Remove this the all access to opt.group[s|ed] comes
+    # from checkrepo.
+    opts.grouped = checkrepo.grouped
+    opts.groups = checkrepo.groups

    if opts.skip:
        if not len(args):
-            raise oscerr.WrongArgs('Please give, if you want to skip a review specify a SRID')
+            raise oscerr.WrongArgs('Provide #IDs to skip.')
+
        for id_ in args:
            msg = 'skip review'
            print 'ACCEPTED', msg
@ -1040,7 +740,7 @@ def do_check_repo(self, subcmd, opts, *args):
        groups[p.group] = a

    self.repocheckerdir = os.path.dirname(os.path.realpath(os.path.expanduser('~/.osc-plugins/osc-check_repo.py')))
-    self.repodir = "%s/repo-%s-%s-x86_64" % (TMPDIR, 'openSUSE:Factory', 'standard')
+    self.repodir = "%s/repo-%s-%s-x86_64" % (CACHEDIR, 'openSUSE:Factory', 'standard')
    if not os.path.exists(self.repodir):
        os.mkdir(self.repodir)
    civs = 'LC_ALL=C perl %s/bs_mirrorfull --nodebug https://build.opensuse.org/build/%s/%s/x86_64 %s' % (
--- a/osclib/checkrepo.py
+++ b/osclib/checkrepo.py
@ -0,0 +1,72 @@
+# Copyright (C) 2014 SUSE Linux Products GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import urllib2
+from xml.etree import cElementTree as ET
+
+from osc.core import http_POST
+from osc.core import makeurl
+
+from osclib.stagingapi import StagingAPI
+
+
+class CheckRepo(object):
+
+    def __init__(self, apiurl):
+        """CheckRepo constructor."""
+        self.apiurl = apiurl
+        self.staging = StagingAPI(apiurl)
+
+        # grouped = { id: staging, }
+        self.grouped = {}
+        # groups = { staging: [ids,], }
+        self.groups = {}
+        self._staging()
+
+    def _staging(self):
+        """
+        Preload the groups of related request associated by the same
+        staging project.
+        """
+        for project in self.staging.get_staging_projects():
+            # Get all the requests identifier for the project
+            requests = self.staging.get_prj_pseudometa(project)['requests']
+            requests = [req['id'] for req in requests]
+
+            # Note: Originally we recover also the request returned by
+            # list_requests_in_prj().  I guest that if the staging
+            # project is working properly, this method do not add any
+            # new request to the list.
+            if requests:
+                self.groups[project] = requests
+                self.grouped.update({req: project for req in requests})
+
+    def change_review_state(self, request_id, newstate, message=''):
+        """Based on osc/osc/core.py. Fixed 'by_user'."""
+        query = {
+            'cmd': 'changereviewstate',
+            'newstate': newstate,
+            'by_user': 'factory-repo-checker',
+        }
+
+        code = 404
+        url = makeurl(self.apiurl, ['request', str(request_id)], query=query)
+        try:
+            root = ET.parse(http_POST(url, data=message)).getroot()
+            code = root.attrib['code']
+        except urllib2.HTTPError, e:
+            print('ERROR in URL %s [%s]' % (url, e))
+        return code
--- a/osclib/cycle.py
+++ b/osclib/cycle.py
@ -0,0 +1,279 @@
+# Copyright (C) 2014 SUSE Linux Products GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from copy import deepcopy
+import urllib2
+from xml.etree import cElementTree as ET
+
+from osc.core import http_GET
+from osc.core import makeurl
+
+from .memoize import memoize
+
+
+class Graph(dict):
+    """Graph object. Inspired in NetworkX data model."""
+
+    def __init__(self):
+        """Initialize an empty graph."""
+        #  The nodes are stored in the Graph dict itself, but the
+        #  adjacent list is stored as an attribute.
+        self.adj = {}
+
+    def add_node(self, name, value):
+        """Add a node in the graph."""
+        self[name] = value
+        if name not in self.adj:
+            self.adj[name] = set()
+
+    def add_nodes_from(self, nodes_and_values):
+        """Add multiple nodes"""
+        for node, value in nodes_and_values:
+            self.add_node(node, value)
+
+    def add_edge(self, u, v, directed=True):
+        """Add the edge u -> v, an v -> u if not directed."""
+        self.adj[u].add(v)
+        if not directed:
+            self.adj[v].add(u)
+
+    def add_edges_from(self, edges, directed=True):
+        """Add the edges from an iterator."""
+        for u, v in edges:
+            self.add_edge(u, v, directed)
+
+    def remove_edge(self, u, v, directed=True):
+        """Remove the edge u -> v, an v -> u if not directed."""
+        try:
+            self.adj[u].remove(v)
+        except KeyError:
+            pass
+        if not directed:
+            try:
+                self.adj[v].remove(u)
+            except KeyError:
+                pass
+
+    def remove_edges_from(self, edges, directed=True):
+        """Remove the edges from an iterator."""
+        for u, v in edges:
+            self.remove_edge(u, v, directed)
+
+    def edges(self, v):
+        """Get the adjancent list for a vertex."""
+        return sorted(self.adj[v]) if v in self else ()
+
+    def edges_to(self, v):
+        """Get the all the vertex that point to v."""
+        return sorted(u for u in self.adj if v in self.adj[u])
+
+    def cycles(self):
+        """Detect cycles using Tarjan algorithm."""
+        index = [0]
+        path = []
+        cycles = []
+
+        v_index = {}
+        v_lowlink = {}
+
+        def scc(node, v):
+            v_index[v], v_lowlink[v] = index[0], index[0]
+            index[0] += 1
+            path.append(node)
+
+            for succ in self.adj.get(node, []):
+                w = self[succ]
+                if w not in v_index:
+                    scc(succ, w)
+                    v_lowlink[v] = min(v_lowlink[v], v_lowlink[w])
+                elif succ in path:
+                    v_lowlink[v] = min(v_lowlink[v], v_index[w])
+
+            if v_index[v] == v_lowlink[v]:
+                i = path.index(node)
+                path[:], cycle = path[:i], frozenset(path[i:])
+                if len(cycle) > 1:
+                    cycles.append(cycle)
+
+        for node in sorted(self):
+            v = self[node]
+            if not getattr(v, 'index', 0):
+                scc(node, v)
+        return frozenset(cycles)
+
+
+class Package(object):
+    """Simple package container. Used in a graph as a vertex."""
+
+    def __init__(self, pkg=None, src=None, deps=None, subs=None,
+                 element=None):
+        self.pkg = pkg
+        self.src = src
+        self.deps = deps
+        self.subs = subs
+        if element:
+            self.load(element)
+
+    def load(self, element):
+        """Load a node from a ElementTree package XML element"""
+        self.pkg = element.attrib['name']
+        self.src = [e.text for e in element.findall('source')]
+        assert len(self.src) == 1, 'There are more that one source packages in the graph'
+        self.src = self.src[0]
+        self.deps = set(e.text for e in element.findall('pkgdep'))
+        self.subs = set(e.text for e in element.findall('subpkg'))
+
+    def __repr__(self):
+        return 'PKG: %s\nSRC: %s\nDEPS: %s\n SUBS: %s' % (self.pkg,
+                                                          self.src,
+                                                          self.deps,
+                                                          self.subs)
+
+
+class CycleDetector(object):
+    """Class to detect cycles in Factory."""
+
+    def __init__(self, apiurl):
+        self.apiurl = apiurl
+        # Store packages prevoiusly ignored. Don't pollute the screen.
+        self._ignore_packages = set()
+
+    @memoize(ttl=60*60*6)
+    def _builddepinfo(self, project, repository, arch):
+        root = None
+        try:
+            print('Generating _builddepinfo for (%s, %s, %s)' % (project, repository, arch))
+            url = makeurl(self.apiurl, ['/build/%s/%s/%s/_builddepinfo' % (project, repository, arch)])
+            root = http_GET(url).read()
+        except urllib2.HTTPError, e:
+            print('ERROR in URL %s [%s]' % (url, e))
+        return root
+
+    def _get_builddepinfo(self, project, repository, arch, package):
+        """Get the builddep info for a single package"""
+        root = ET.fromstring(self._builddepinfo(project, repository, arch))
+        packages = [Package(element=e) for e in root.findall('package')]
+        package = [p for p in packages if p.pkg == package]
+        return package[0] if package else None
+
+    def _get_builddepinfo_graph(self, project, repository, arch):
+        """Generate the buildepinfo graph for a given architecture."""
+
+        _IGNORE_PREFIX = ('texlive-', 'master-boot-code')
+
+        # Note, by default generate the graph for all Factory. If you only
+        # need the base packages you can use:
+        #   project = 'Base:System'
+        #   repository = 'openSUSE_Factory'
+
+        root = ET.fromstring(self._builddepinfo(project, repository, arch))
+        # Reset the subpackages dict here, so for every graph is a
+        # different object.
+        packages = [Package(element=e) for e in root.findall('package')]
+
+        # XXX - Ugly Exception. We need to ignore branding packages and
+        # packages that one of his dependencies do not exist. Also ignore
+        # preinstall images.
+        packages = [p for p in packages if not ('branding' in p.pkg or p.pkg.startswith('preinstallimage-'))]
+
+        graph = Graph()
+        graph.add_nodes_from((p.pkg, p) for p in packages)
+
+        subpkgs = {}    # Given a subpackage, recover the source package
+        for p in packages:
+            # Check for packages that provides the same subpackage
+            for subpkg in p.subs:
+                if subpkg in subpkgs:
+                    # print 'Subpackage duplication %s - %s (subpkg: %s)' % (p.pkg, subpkgs[subpkg], subpkg)
+                    pass
+                else:
+                    subpkgs[subpkg] = p.pkg
+
+        for p in packages:
+            # Calculate the missing deps
+            deps = [d for d in p.deps if 'branding' not in d]
+            missing = [d for d in deps if not d.startswith(_IGNORE_PREFIX) and d not in subpkgs]
+            if missing:
+                if p.pkg not in self._ignore_packages:
+                    # print 'Ignoring package. Missing dependencies %s -> (%s) %s...' % (p.pkg, len(missing), missing[:5])
+                    self._ignore_packages.add(p.pkg)
+                continue
+
+            # XXX - Ugly Hack. Subpagackes for texlive are not correctly
+            # generated. If the dependency starts with texlive- prefix,
+            # assume that the correct source package is texlive.
+            graph.add_edges_from((p.pkg, subpkgs[d] if not d.startswith('texlive-') else 'texlive')
+                                 for d in deps if not d.startswith('master-boot-code'))
+
+        # Store the subpkgs dict in the graph. It will be used later.
+        graph.subpkgs = subpkgs
+        return graph
+
+    def _get_builddepinfo_cycles(self, package, repository, arch):
+        """Generate the buildepinfo cycle list for a given architecture."""
+        root = ET.fromstring(self._builddepinfo(package, repository, arch))
+        return frozenset(frozenset(e.text for e in cycle.findall('package'))
+                         for cycle in root.findall('cycle'))
+
+    def cycles(self, packs, project='openSUSE:Factory', repository='standard', arch='x86_64'):
+        """Detect cycles in a specific repository."""
+
+        # Detect cycles - We create the full graph from _builddepinfo.
+        factory_graph = self._get_builddepinfo_graph(project, repository, arch)
+        factory_cycles = factory_graph.cycles()
+
+        # This graph will be updated for every request
+        current_graph = deepcopy(factory_graph)
+
+        subpkgs = current_graph.subpkgs
+
+        # Recover all packages at once, ignoring some packages that
+        # can't be found in x86_64 architecture.
+        #
+        # The first filter is to remove some packages that do not have
+        # `goodrepos`. Those packages are usually marked as 'p.update
+        # = True' (meaning that they are declined or there is a new
+        # updated review).
+        all_packages = [self._get_builddepinfo(p.sproject, p.goodrepos[0], arch, p.spackage)
+                        for p in packs if not p.updated]
+        all_packages = [pkg for pkg in all_packages if pkg]
+
+        subpkgs.update(dict((p, pkg.pkg) for pkg in all_packages for p in pkg.subs))
+
+        for pkg in all_packages:
+            # Update the current graph and see if we have different cycles
+            edges_to = ()
+            if pkg.pkg in current_graph:
+                current_graph[pkg.pkg] = pkg
+                current_graph.remove_edges_from(set((pkg.pkg, p) for p in current_graph.edges(pkg.pkg)))
+                edges_to = current_graph.edges_to(pkg.pkg)
+                current_graph.remove_edges_from(set((p, pkg.pkg) for p in edges_to))
+            else:
+                current_graph.add_node(pkg.pkg, pkg)
+            current_graph.add_edges_from((pkg.pkg, subpkgs[p]) for p in pkg.deps if p in subpkgs)
+            current_graph.add_edges_from((p, pkg.pkg) for p in edges_to
+                                         if pkg.pkg in set(subpkgs[sp] for sp in current_graph[p].deps if sp in subpkgs))
+
+        for cycle in current_graph.cycles():
+            if cycle not in factory_cycles:
+                print
+                print 'New cycle detected:', sorted(cycle)
+                factory_edges = set((u, v) for u in cycle for v in factory_graph.edges(u) if v in cycle)
+                current_edges = set((u, v) for u in cycle for v in current_graph.edges(u) if v in cycle)
+                print 'New edges:', sorted(current_edges - factory_edges)
+                # Mark all packages as updated, to avoid to be accepted
+                for p in reqs:
+                    p.updated = True
--- a/osclib/graph.py
+++ b/osclib/graph.py
@ -1,105 +0,0 @@
-# Copyright (C) 2014 SUSE Linux Products GmbH
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-class Graph(dict):
-    """Graph object. Inspired in NetworkX data model."""
-
-    def __init__(self):
-        """Initialize an empty graph."""
-        #  The nodes are stored in the Graph dict itself, but the
-        #  adjacent list is stored as an attribute.
-        self.adj = {}
-
-    def add_node(self, name, value):
-        """Add a node in the graph."""
-        self[name] = value
-        if name not in self.adj:
-            self.adj[name] = set()
-
-    def add_nodes_from(self, nodes_and_values):
-        """Add multiple nodes"""
-        for node, value in nodes_and_values:
-            self.add_node(node, value)
-
-    def add_edge(self, u, v, directed=True):
-        """Add the edge u -> v, an v -> u if not directed."""
-        self.adj[u].add(v)
-        if not directed:
-            self.adj[v].add(u)
-
-    def add_edges_from(self, edges, directed=True):
-        """Add the edges from an iterator."""
-        for u, v in edges:
-            self.add_edge(u, v, directed)
-
-    def remove_edge(self, u, v, directed=True):
-        """Remove the edge u -> v, an v -> u if not directed."""
-        try:
-            self.adj[u].remove(v)
-        except KeyError:
-            pass
-        if not directed:
-            try:
-                self.adj[v].remove(u)
-            except KeyError:
-                pass
-
-    def remove_edges_from(self, edges, directed=True):
-        """Remove the edges from an iterator."""
-        for u, v in edges:
-            self.remove_edge(u, v, directed)
-
-    def edges(self, v):
-        """Get the adjancent list for a vertex."""
-        return sorted(self.adj[v]) if v in self else ()
-
-    def edges_to(self, v):
-        """Get the all the vertex that point to v."""
-        return sorted(u for u in self.adj if v in self.adj[u])
-
-    def cycles(self):
-        """Detect cycles using Tarjan algorithm."""
-        index = [0]
-        path = []
-        cycles = []
-
-        v_index = {}
-        v_lowlink = {}
-
-        def scc(node, v):
-            v_index[v], v_lowlink[v] = index[0], index[0]
-            index[0] += 1
-            path.append(node)
-
-            for succ in self.adj.get(node, []):
-                w = self[succ]
-                if w not in v_index:
-                    scc(succ, w)
-                    v_lowlink[v] = min(v_lowlink[v], v_lowlink[w])
-                elif succ in path:
-                    v_lowlink[v] = min(v_lowlink[v], v_index[w])
-
-            if v_index[v] == v_lowlink[v]:
-                i = path.index(node)
-                path[:], cycle = path[:i], frozenset(path[i:])
-                if len(cycle) > 1:
-                    cycles.append(cycle)
-
-        for node in sorted(self):
-            v = self[node]
-            if not getattr(v, 'index', 0):
-                scc(node, v)
-        return frozenset(cycles)
--- a/osclib/memoize.py
+++ b/osclib/memoize.py
@ -0,0 +1,149 @@
+# Copyright (C) 2014 SUSE Linux Products GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from datetime import datetime
+import fcntl
+from functools import wraps
+import os
+import shelve
+try:
+    import cPickle as pickle
+except:
+    import pickle
+
+
+# Where the cache files are stored
+CACHEDIR = '/var/cache/repo-checker'
+
+
+def memoize(ttl=None):
+    """Decorator function to implement a persistent cache.
+
+    >>> @memoize()
+    ... def test_func(a):
+    ...     return a
+
+    Internally, the memoized function has a cache:
+
+    >>> cache = [c.cell_contents for c in test_func.func_closure if 'sync' in dir(c.cell_contents)][0]
+    >>> 'sync' in dir(cache)
+    True
+
+    There is a limit of the size of the cache
+
+    >>> for k in cache:
+    ...     del cache[k]
+    >>> len(cache)
+    0
+
+    >>> for i in range(4095):
+    ...     test_func(i)
+    ... len(cache)
+    4095
+
+    >>> test_func(0)
+    0
+
+    >>> len(cache)
+    4095
+
+    >>> test_func(4095)
+    4095
+
+    >>> len(cache)
+    3072
+
+    >>> test_func(0)
+    0
+
+    >>> len(cache)
+    3073
+
+    >>> from datetime import timedelta
+    >>> k = [k for k in cache if cPickle.loads(k) == ((0,), {})][0]
+    >>> t, v = cache[k]
+    >>> t = t - timedelta(days=10)
+    >>> cache[k] = (t, v)
+    >>> test_func(0)
+    0
+    >>> t2, v = cache[k]
+    >>> t != t2
+    True
+
+    """
+
+    # Configuration variables
+    SLOTS = 4096            # Number of slots in the cache file
+    NCLEAN = 1024           # Number of slots to remove when limit reached
+    TIMEOUT = 60*60*2       # Time to live for every cache slot (seconds)
+
+    def _memoize(fn):
+        # Implement a POSIX lock / unlock extension for shelves. Inspired
+        # on ActiveState Code recipe #576591
+        def _lock(filename):
+            lckfile = open(filename + '.lck', 'w')
+            fcntl.flock(lckfile.fileno(), fcntl.LOCK_EX)
+            return lckfile
+
+        def _unlock(lckfile):
+            fcntl.flock(lckfile.fileno(), fcntl.LOCK_UN)
+            lckfile.close()
+
+        def _open_cache(cache_name):
+            lckfile = _lock(cache_name)
+            cache = shelve.open(cache_name, protocol=-1)
+            # Store a reference to the lckfile to avoid to be closed by gc
+            cache.lckfile = lckfile
+            return cache
+
+        def _close_cache(cache):
+            cache.close()
+            _unlock(cache.lckfile)
+
+        def _clean_cache(cache):
+            len_cache = len(cache)
+            if len_cache >= SLOTS:
+                nclean = NCLEAN + len_cache - SLOTS
+                keys_to_delete = sorted(cache, key=lambda k: cache[k][0])[:nclean]
+                for key in keys_to_delete:
+                    del cache[key]
+
+        @wraps(fn)
+        def _fn(*args, **kwargs):
+            def total_seconds(td):
+                return (td.microseconds + (td.seconds + td.days * 24 * 3600.) * 10**6) / 10**6
+            now = datetime.now()
+            key = pickle.dumps((args, kwargs), protocol=-1)
+            updated = False
+            cache = _open_cache(cache_name)
+            if key in cache:
+                timestamp, value = cache[key]
+                updated = True if total_seconds(now-timestamp) < ttl else False
+            if not updated:
+                value = fn(*args, **kwargs)
+                cache[key] = (now, value)
+            _clean_cache(cache)
+            _close_cache(cache)
+            return value
+
+        cache_dir = os.path.expanduser(CACHEDIR)
+        if not os.path.exists(cache_dir):
+            os.makedirs(cache_dir)
+        cache_name = os.path.join(cache_dir, fn.__name__)
+        return _fn
+
+    ttl = ttl if ttl else TIMEOUT
+    return _memoize