it's less reliable (with multiple spec files) than just querying the history of the possible source projects
441 lines
17 KiB
Python
Executable File
441 lines
17 KiB
Python
Executable File
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
# Copyright (c) 2015 SUSE Linux GmbH
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
# SOFTWARE.
|
|
|
|
import argparse
|
|
import itertools
|
|
import logging
|
|
import sys
|
|
from xml.etree import cElementTree as ET
|
|
|
|
import osc.conf
|
|
import osc.core
|
|
import urllib2
|
|
import sys
|
|
import time
|
|
|
|
from osclib.memoize import memoize
|
|
|
|
OPENSUSE = 'openSUSE:42'
|
|
|
|
makeurl = osc.core.makeurl
|
|
http_GET = osc.core.http_GET
|
|
http_DELETE = osc.core.http_DELETE
|
|
http_PUT = osc.core.http_PUT
|
|
http_POST = osc.core.http_POST
|
|
|
|
class UpdateCrawler(object):
|
|
def __init__(self, from_prj, caching = True):
|
|
self.from_prj = from_prj
|
|
self.caching = caching
|
|
self.apiurl = osc.conf.config['apiurl']
|
|
self.project_preference_order = [
|
|
'SUSE:SLE-12-SP1:Update',
|
|
'SUSE:SLE-12-SP1:GA',
|
|
'SUSE:SLE-12:Update',
|
|
'SUSE:SLE-12:GA',
|
|
'openSUSE:Factory',
|
|
]
|
|
self.subprojects = [
|
|
'%s:SLE-Pkgs-With-Overwrites' % self.from_prj,
|
|
'%s:Factory-Copies' % self.from_prj,
|
|
'%s:SLE12-Picks' % self.from_prj,
|
|
]
|
|
self.projects = [self.from_prj] + self.subprojects
|
|
|
|
self.project_mapping = {}
|
|
for prj in self.project_preference_order:
|
|
if prj.startswith('SUSE:'):
|
|
self.project_mapping[prj] = self.from_prj + ':SLE12-Picks'
|
|
else:
|
|
self.project_mapping[prj] = self.from_prj + ':Factory-Copies'
|
|
|
|
self.packages = dict()
|
|
for project in self.projects + self.project_preference_order:
|
|
self.packages[project] = self.get_source_packages(project)
|
|
self.pending_requests = []
|
|
self.pending_requests = self.get_requests_list(self.from_prj)
|
|
|
|
@memoize()
|
|
def _cached_GET(self, url):
|
|
return self.retried_GET(url).read()
|
|
|
|
def cached_GET(self, url):
|
|
if self.caching:
|
|
return self._cached_GET(url)
|
|
return self.retried_GET(url).read()
|
|
|
|
def retried_GET(self, url):
|
|
try:
|
|
return http_GET(url)
|
|
except urllib2.HTTPError, e:
|
|
if 500 <= e.code <= 599:
|
|
print 'Retrying {}'.format(url)
|
|
time.sleep(1)
|
|
return self.retried_GET(url)
|
|
raise e
|
|
|
|
def get_source_packages(self, project, expand=False):
|
|
"""Return the list of packages in a project."""
|
|
query = {'expand': 1} if expand else {}
|
|
root = ET.fromstring(
|
|
self.cached_GET(makeurl(self.apiurl,
|
|
['source', project],
|
|
query=query)))
|
|
packages = [i.get('name') for i in root.findall('entry')]
|
|
return packages
|
|
|
|
def _get_source_package(self, project, package, revision):
|
|
opts = { 'view': 'info' }
|
|
if revision:
|
|
opts['rev'] = revision
|
|
return self.cached_GET(makeurl(self.apiurl,
|
|
['source', project, package], opts))
|
|
|
|
|
|
def get_requests_list(self, project):
|
|
""" Return package name of pending review requests """
|
|
requests = []
|
|
target = "action[target/@project='{}']".format(project)
|
|
query = "match=({})+and+({})".format("state/@name='new'+or+state/@name='review'", target)
|
|
url = makeurl(self.apiurl,['search', 'request'], query)
|
|
f = http_GET(url)
|
|
root = ET.parse(f).getroot()
|
|
for rq in root.findall('request'):
|
|
action = rq.findall('action')
|
|
action = action[0] # take the first one only
|
|
requests.append(action.find('target').get('package'))
|
|
|
|
return requests
|
|
|
|
def get_request_infos(self, requestid):
|
|
request = self.cached_GET(makeurl(self.apiurl, ['request', requestid]))
|
|
root = ET.fromstring(request)
|
|
action = root.find('.//action')
|
|
source = action.find('source')
|
|
target = action.find('target')
|
|
project = source.get('project')
|
|
package = source.get('package')
|
|
rev = source.get('rev')
|
|
return ( project, package, rev, target.get('package') )
|
|
|
|
def remove_packages(self, project, packages):
|
|
for package in packages:
|
|
if not package in self.packages[project]:
|
|
continue
|
|
logging.info("deleting %s/%s", project, package)
|
|
url = makeurl(self.apiurl, ['source', project, package])
|
|
try:
|
|
http_DELETE(url)
|
|
self.packages[project].remove(package)
|
|
except urllib2.HTTPError, err:
|
|
if err.code == 404:
|
|
# not existant package is ok, we delete them all
|
|
pass
|
|
else:
|
|
# If the package was there but could not be delete, raise the error
|
|
raise
|
|
|
|
# copied from stagingapi - but the dependencies are too heavy
|
|
def create_package_container(self, project, package):
|
|
"""
|
|
Creates a package container without any fields in project/package
|
|
:param project: project to create it
|
|
:param package: package name
|
|
"""
|
|
dst_meta = '<package name="{}"><title/><description/></package>'
|
|
dst_meta = dst_meta.format(package)
|
|
|
|
url = makeurl(self.apiurl, ['source', project, package, '_meta'])
|
|
logging.debug("create %s/%s", project, package)
|
|
http_PUT(url, data=dst_meta)
|
|
|
|
def _link_content(self, sourceprj, sourcepkg, rev):
|
|
root = ET.fromstring(self._get_source_package(sourceprj, sourcepkg, rev))
|
|
srcmd5 = root.get('srcmd5')
|
|
vrev = root.get('vrev')
|
|
if vrev is None:
|
|
vrev = ''
|
|
else:
|
|
vrev = " vrev='{}'".format(vrev)
|
|
link = "<link project='{}' package='{}' rev='{}'{}/>"
|
|
return link.format(sourceprj, sourcepkg, srcmd5, vrev)
|
|
|
|
def upload_link(self, project, package, link_string):
|
|
url = makeurl(self.apiurl, ['source', project, package, '_link'])
|
|
http_PUT(url, data=link_string)
|
|
|
|
def link_packages(self, sourceprj, sourcepkg, sourcerev, targetprj, targetpkg):
|
|
logging.info("update link %s/%s -> %s/%s@%s", targetprj, targetpkg, sourceprj, sourcepkg, sourcerev)
|
|
|
|
# check do it have pending review request against this package
|
|
if targetpkg in self.pending_requests:
|
|
logging.info("There is a pending request to %s / %s, ignore!"%(self.from_prj, packages))
|
|
return
|
|
|
|
packages = [ targetpkg ]
|
|
|
|
# make sure 'packages' covers all subpackages
|
|
subpackages, mainpackage = self.get_specfiles(self.from_prj, targetpkg)
|
|
if subpackages:
|
|
if mainpackage and mainpackage != targetpkg:
|
|
raise Exception("{} != {}".format(mainpackage, targetpkg))
|
|
|
|
packages = list(set([ mainpackage ] ) | set(subpackages))
|
|
|
|
# remove targetpkg in all subprojects where it existed before
|
|
for project in self.subprojects:
|
|
if not targetpkg in self.packages[project]:
|
|
continue
|
|
s, m = self.get_specfiles(project, targetpkg)
|
|
pkgs = []
|
|
if s:
|
|
pkgs += s
|
|
if m:
|
|
pkgs += [m]
|
|
self.remove_packages(project, pkgs)
|
|
|
|
self.create_package_container(targetprj, targetpkg)
|
|
link = self._link_content(sourceprj, sourcepkg, sourcerev)
|
|
self.upload_link(targetprj, targetpkg, link)
|
|
|
|
for package in [ p for p in packages if p != targetpkg ]:
|
|
# FIXME: link packages from factory that override sle
|
|
# ones to different project
|
|
logging.debug("linking %s -> %s", package, targetpkg)
|
|
link = "<link cicount='copy' package='{}' />".format(targetpkg)
|
|
self.create_package_container(targetprj, package)
|
|
self.upload_link(targetprj, package, link)
|
|
|
|
self.remove_packages(self.from_prj, packages)
|
|
|
|
def crawl(self, packages = None):
|
|
"""Main method of the class that run the crawler."""
|
|
|
|
if packages:
|
|
packages = [p for p in packages if p in self.packages[self.from_prj]]
|
|
else:
|
|
packages = self.packages[self.from_prj]
|
|
requests = dict()
|
|
|
|
self.try_to_find_left_packages(packages)
|
|
self.cleanup_and_freeze_links(self.from_prj, packages)
|
|
|
|
def check_source_in_project(self, project, package, verifymd5):
|
|
if not package in self.packages[project]:
|
|
return None
|
|
|
|
try:
|
|
his = self.cached_GET(makeurl(self.apiurl,
|
|
['source', project, package, '_history']))
|
|
except urllib2.HTTPError:
|
|
return None
|
|
|
|
his = ET.fromstring(his)
|
|
revs = list()
|
|
for rev in his.findall('revision'):
|
|
revs.append(rev.find('srcmd5').text)
|
|
revs.reverse()
|
|
for i in range(min(len(revs), 5)): # check last 5 commits
|
|
srcmd5=revs.pop(0)
|
|
root = self.cached_GET(makeurl(self.apiurl,
|
|
['source', project, package], { 'rev': srcmd5, 'view': 'info'}))
|
|
root = ET.fromstring(root)
|
|
if root.get('verifymd5') == verifymd5:
|
|
return srcmd5
|
|
return None
|
|
|
|
# check if we can find the srcmd5 in any of our underlay
|
|
# projects
|
|
def try_to_find_left_packages(self, packages):
|
|
for package in packages:
|
|
root = ET.fromstring(self._get_source_package(self.from_prj, package, None))
|
|
linked = root.find('linked')
|
|
if not linked is None and linked.get('package') != package:
|
|
logging.warn("link mismatch: %s <> %s, subpackage?", linked.get('package'), package)
|
|
continue
|
|
|
|
logging.debug("check where %s came from", package)
|
|
foundit = False
|
|
for project in self.project_preference_order:
|
|
srcmd5 = self.check_source_in_project(project, package, root.get('verifymd5'))
|
|
if srcmd5:
|
|
logging.debug('%s -> %s', package, project)
|
|
self.link_packages(project, package, srcmd5, self.project_mapping[project], package)
|
|
foundit = True
|
|
break
|
|
if not foundit:
|
|
logging.debug('%s is a fork', package)
|
|
|
|
def get_link(self, project, package):
|
|
try:
|
|
link = self.cached_GET(makeurl(self.apiurl,
|
|
['source', project, package, '_link']))
|
|
except urllib2.HTTPError:
|
|
return None
|
|
return ET.fromstring(link)
|
|
|
|
def freeze_link(self, project, package):
|
|
link = self.get_link(project, package)
|
|
if link is None:
|
|
return None
|
|
rev = link.get('rev')
|
|
# XXX: magic number?
|
|
if rev and len(rev) > 5:
|
|
return True
|
|
lpackage = link.get('package') or package
|
|
if not link.get('project'):
|
|
if lpackage not in self.packages[project]:
|
|
return False
|
|
return True
|
|
opts = { 'view': 'info' }
|
|
if rev:
|
|
opts['rev'] = rev
|
|
root = self.cached_GET(makeurl(self.apiurl,
|
|
['source', link.get('project'), lpackage], opts ))
|
|
root = ET.fromstring(root)
|
|
self.link_packages(link.get('project'), lpackage, root.get('srcmd5'), project, package)
|
|
return True
|
|
|
|
def cleanup_and_freeze_links(self, prj, packages):
|
|
logging.debug("check for links to freeze in %s", prj)
|
|
for package in packages:
|
|
try:
|
|
if self.freeze_link(prj, package) == False:
|
|
logging.error('invalid link %s/%s', prj, package)
|
|
self.remove_packages(prj, [package])
|
|
except urllib2.HTTPError, e:
|
|
logging.error("failed to freeze {}/{}: {}".format(prj, package, e))
|
|
|
|
|
|
def check_dups(self):
|
|
""" walk through projects in order of preference and warn about
|
|
duplicates in overlayed projects"""
|
|
mypackages = dict()
|
|
for project in self.subprojects:
|
|
for package in self.packages[project]:
|
|
if package in mypackages:
|
|
logging.warn("duplicate %s/%s, in %s", project, package, mypackages[package])
|
|
else:
|
|
mypackages[package] = project
|
|
|
|
def freeze_candidates(self):
|
|
url = makeurl(self.apiurl, ['source', 'openSUSE:Factory'], { 'view': 'info' } )
|
|
root = ET.fromstring(self.cached_GET(url))
|
|
flink = ET.Element('frozenlinks')
|
|
fl = ET.SubElement(flink, 'frozenlink', {'project': 'openSUSE:Factory'})
|
|
|
|
for package in root.findall('sourceinfo'):
|
|
exists = False
|
|
if package.get('package').startswith('_product'):
|
|
continue
|
|
for prj in self.projects:
|
|
if package.get('package') in self.packages[prj]:
|
|
exists = True
|
|
if exists:
|
|
continue
|
|
ET.SubElement(fl, 'package', { 'name': package.get('package'),
|
|
'srcmd5': package.get('srcmd5'),
|
|
'vrev': package.get('vrev') })
|
|
|
|
url = makeurl(self.apiurl, ['source', '%s:Factory-Candidates-Check'%self.from_prj, '_project', '_frozenlinks'], {'meta': '1'})
|
|
try:
|
|
http_PUT(url, data=ET.tostring(flink))
|
|
except urllib2.HTTPError, err:
|
|
logging.error(err)
|
|
|
|
def get_specfiles(self, project, package):
|
|
url = makeurl(self.apiurl, ['source', project, package], { 'expand': '1' } )
|
|
root = None
|
|
while root is None:
|
|
try:
|
|
root = ET.fromstring(self.cached_GET(url))
|
|
except urllib2.HTTPError as e:
|
|
print "ERR", e.reason
|
|
# this can happen if the build service didn't yet work on the previous commit
|
|
if e.code == 400 and e.reason == 'service in progress':
|
|
time.sleep(1)
|
|
continue
|
|
raise
|
|
files = [ entry.get('name').replace('.spec', '') for entry in root.findall('entry') if entry.get('name').endswith('.spec') ]
|
|
if len(files) == 1:
|
|
return None, files[0]
|
|
mainpackage = None
|
|
for subpackage in files[:]:
|
|
link = self.get_link(project, subpackage)
|
|
if link is not None:
|
|
if link.get('project') and link.get('project') != project:
|
|
mainpackage = subpackage
|
|
files.remove(subpackage)
|
|
|
|
logging.info("%s/%s subpackages: %s [%s]", project, package, mainpackage, ','.join(files))
|
|
|
|
return files, mainpackage
|
|
|
|
def main(args):
|
|
# Configure OSC
|
|
osc.conf.get_config(override_apiurl=args.apiurl)
|
|
osc.conf.config['debug'] = args.debug
|
|
|
|
uc = UpdateCrawler(args.from_prj, caching = args.cache_requests )
|
|
uc.check_dups()
|
|
uc.crawl(args.package)
|
|
|
|
if args.no_update_candidates == False:
|
|
uc.freeze_candidates()
|
|
|
|
if __name__ == '__main__':
|
|
description = 'maintain sort openSUSE:42 packages into subprojects'
|
|
parser = argparse.ArgumentParser(description=description)
|
|
parser.add_argument('-A', '--apiurl', metavar='URL', help='API URL')
|
|
parser.add_argument('-d', '--debug', action='store_true',
|
|
help='print info useful for debuging')
|
|
parser.add_argument('-f', '--from', dest='from_prj', metavar='PROJECT',
|
|
help='project where to get the updates (default: %s)' % OPENSUSE,
|
|
default=OPENSUSE)
|
|
parser.add_argument('--skip-sanity-checks', action='store_true',
|
|
help='don\'t do slow check for broken links (only for testing)')
|
|
parser.add_argument('-n', '--dry', action='store_true',
|
|
help='dry run, no POST, PUT, DELETE')
|
|
parser.add_argument('--cache-requests', action='store_true', default=False,
|
|
help='cache GET requests. Not recommended for daily use.')
|
|
parser.add_argument('--no-update-candidates', action='store_true',
|
|
help='don\'t update Factory candidates project')
|
|
parser.add_argument("package", nargs='*', help="package to check")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Set logging configuration
|
|
logging.basicConfig(level=logging.DEBUG if args.debug
|
|
else logging.INFO)
|
|
|
|
if args.dry:
|
|
def dryrun(t, *args, **kwargs):
|
|
return lambda *args, **kwargs: logging.debug("dryrun %s %s %s", t, args, str(kwargs)[:30])
|
|
|
|
http_POST = dryrun('POST')
|
|
http_PUT = dryrun('PUT')
|
|
http_DELETE = dryrun('DELETE')
|
|
|
|
sys.exit(main(args))
|
|
|
|
# vim:sw=4 et
|