Jimmy Berry 6069245350 Remove SUSE copyright, warranty, and license headers.
Distinct copyrights were left as I do not wish to track down commit
history to ensure it properly documents the copyright holders. Also left
non-GPLv2 licenses and left bs_copy untouched as a mirror from OBS.

Already have a mix of with and without headers and even OBS does not place
on majority of files. If SUSE lawyers have an issue it will come up in
legal review for Factory.
2018-08-23 19:18:06 -05:00

214 lines
7.7 KiB
Python

import urllib2
from xml.etree import cElementTree as ET
from osc.core import http_GET
from osc.core import makeurl
from .memoize import memoize
class Graph(dict):
"""Graph object. Inspired in NetworkX data model."""
def __init__(self):
"""Initialize an empty graph."""
# The nodes are stored in the Graph dict itself, but the
# adjacent list is stored as an attribute.
self.adj = {}
def add_node(self, name, value):
"""Add a node in the graph."""
self[name] = value
if name not in self.adj:
self.adj[name] = set()
def add_nodes_from(self, nodes_and_values):
"""Add multiple nodes"""
for node, value in nodes_and_values:
self.add_node(node, value)
def add_edge(self, u, v, directed=True):
"""Add the edge u -> v, an v -> u if not directed."""
self.adj[u].add(v)
if not directed:
self.adj[v].add(u)
def add_edges_from(self, edges, directed=True):
"""Add the edges from an iterator."""
for u, v in edges:
self.add_edge(u, v, directed)
def remove_edge(self, u, v, directed=True):
"""Remove the edge u -> v, an v -> u if not directed."""
try:
self.adj[u].remove(v)
except KeyError:
pass
if not directed:
try:
self.adj[v].remove(u)
except KeyError:
pass
def remove_edges_from(self, edges, directed=True):
"""Remove the edges from an iterator."""
for u, v in edges:
self.remove_edge(u, v, directed)
def edges(self, v):
"""Get the adjancent list for a vertex."""
return sorted(self.adj[v]) if v in self else ()
def edges_to(self, v):
"""Get the all the vertex that point to v."""
return sorted(u for u in self.adj if v in self.adj[u])
def cycles(self):
"""Detect cycles using Tarjan algorithm."""
index = [0]
path = []
cycles = []
v_index = {}
v_lowlink = {}
def scc(node, v):
v_index[v], v_lowlink[v] = index[0], index[0]
index[0] += 1
path.append(node)
for succ in self.adj.get(node, []):
w = self[succ]
if w not in v_index:
scc(succ, w)
v_lowlink[v] = min(v_lowlink[v], v_lowlink[w])
elif succ in path:
v_lowlink[v] = min(v_lowlink[v], v_index[w])
if v_index[v] == v_lowlink[v]:
i = path.index(node)
path[:], cycle = path[:i], frozenset(path[i:])
if len(cycle) > 1:
cycles.append(cycle)
for node in sorted(self):
v = self[node]
if not getattr(v, 'index', 0):
scc(node, v)
return frozenset(cycles)
class Package(object):
"""Simple package container. Used in a graph as a vertex."""
def __init__(self, pkg=None, src=None, deps=None, subs=None,
element=None):
self.pkg = pkg
self.src = src
self.deps = deps
self.subs = subs
if element:
self.load(element)
def load(self, element):
"""Load a node from a ElementTree package XML element"""
self.pkg = element.attrib['name']
self.src = [e.text for e in element.findall('source')]
assert len(self.src) == 1, 'There are more that one source packages in the graph'
self.src = self.src[0]
self.deps = set(e.text for e in element.findall('pkgdep'))
self.subs = set(e.text for e in element.findall('subpkg'))
def __repr__(self):
return 'PKG: %s\nSRC: %s\nDEPS: %s\n SUBS: %s' % (self.pkg,
self.src,
self.deps,
self.subs)
class CycleDetector(object):
"""Class to detect cycles in an OBS project."""
def __init__(self, api):
self.api = api
# Store packages prevoiusly ignored. Don't pollute the screen.
self._ignore_packages = set()
def _builddepinfo(self, project, repository, arch):
root = None
try:
# print('Generating _builddepinfo for (%s, %s, %s)' % (project, repository, arch))
url = makeurl(self.api.apiurl, ['build/%s/%s/%s/_builddepinfo' % (project, repository, arch)])
root = http_GET(url).read()
except urllib2.HTTPError as e:
print('ERROR in URL %s [%s]' % (url, e))
return root
def _get_builddepinfo_graph(self, project, repository, arch):
"""Generate the buildepinfo graph for a given architecture."""
# Note, by default generate the graph for all Factory /
# 13/2. If you only need the base packages you can use:
# project = 'Base:System'
# repository = 'openSUSE_Factory'
root = ET.fromstring(self._builddepinfo(project, repository, arch))
# Reset the subpackages dict here, so for every graph is a
# different object.
packages = [Package(element=e) for e in root.findall('package')]
graph = Graph()
graph.add_nodes_from((p.pkg, p) for p in packages)
subpkgs = {} # Given a subpackage, recover the source package
for p in packages:
# Check for packages that provides the same subpackage
for subpkg in p.subs:
if subpkg in subpkgs:
# print 'Subpackage duplication %s - %s (subpkg: %s)' % (p.pkg, subpkgs[subpkg], subpkg)
pass
else:
subpkgs[subpkg] = p.pkg
for p in packages:
# Calculate the missing deps
deps = p.deps
missing = set(deps) - set(subpkgs)
if missing:
if p.pkg not in self._ignore_packages:
# print 'Ignoring package. Missing dependencies %s -> (%s) %s...' % (p.pkg, len(missing), missing[:5])
self._ignore_packages.add(p.pkg)
continue
graph.add_edges_from((p.pkg, subpkgs[d]) for d in deps)
# Store the subpkgs dict in the graph. It will be used later.
graph.subpkgs = subpkgs
return graph
def cycles(self, override_pair, overridden_pair, arch):
"""Detect cycles in a specific repository."""
# Detect cycles - We create the full graph from _builddepinfo.
project_graph = self._get_builddepinfo_graph(overridden_pair[0], overridden_pair[1], arch)
current_graph = self._get_builddepinfo_graph(override_pair[0], override_pair[1], arch)
# Sometimes, new cycles have only new edges, but not new
# packages. We need to inform about this, so this can become
# a warning instead of an error.
#
# To do that, we store in `project_cycles_pkgs` all the
# project (i.e Factory) cycles as a set of packages, so we can
# check if the new cycle (also as a set of packages) is
# included here.
project_cycles = project_graph.cycles()
project_cycles_pkgs = [set(cycle) for cycle in project_cycles]
for cycle in current_graph.cycles():
if cycle not in project_cycles:
project_edges = set((u, v) for u in cycle for v in project_graph.edges(u) if v in cycle)
current_edges = set((u, v) for u in cycle for v in current_graph.edges(u) if v in cycle)
current_pkgs = set(cycle)
yield (cycle,
sorted(current_edges - project_edges),
not any(current_pkgs.issubset(cpkgs) for cpkgs in project_cycles_pkgs))