Make update crawler accept list of packages

Also change info requesting strategy. Get list of packges first, then
query them in chunks. That allows to use the unexpanded view and query
less packages in update projects.
This commit is contained in:
Ludwig Nussel 2016-05-02 10:13:59 +02:00
parent 521b61b362
commit e2c9620a7b

View File

@ -33,6 +33,7 @@ import osc.conf
import osc.core import osc.core
import rpm import rpm
import yaml import yaml
from urllib import quote_plus
from osclib.memoize import memoize from osclib.memoize import memoize
@ -43,6 +44,12 @@ SLE = 'SUSE:SLE-12-SP1:Update'
makeurl = osc.core.makeurl makeurl = osc.core.makeurl
http_GET = osc.core.http_GET http_GET = osc.core.http_GET
# http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks-in-python
def chunks(l, n):
""" Yield successive n-sized chunks from l.
"""
for i in xrange(0, len(l), n):
yield l[i:i+n]
class UpdateCrawler(object): class UpdateCrawler(object):
def __init__(self, from_prj, to_prj): def __init__(self, from_prj, to_prj):
@ -50,8 +57,32 @@ class UpdateCrawler(object):
self.to_prj = to_prj self.to_prj = to_prj
self.apiurl = osc.conf.config['apiurl'] self.apiurl = osc.conf.config['apiurl']
self.debug = osc.conf.config['debug'] self.debug = osc.conf.config['debug']
self.parse_lookup()
self.filter_lookup = set() self.filter_lookup = set()
self.caching = False
self.dryrun = False
self.parse_lookup()
# FIXME: duplicated from manager_42
def latest_packages(self):
data = self.cached_GET(makeurl(self.apiurl,
['project', 'latest_commits', self.from_prj]))
lc = ET.fromstring(data)
packages = set()
for entry in lc.findall('{http://www.w3.org/2005/Atom}entry'):
title = entry.find('{http://www.w3.org/2005/Atom}title').text
if title.startswith('In '):
packages.add(title[3:].split(' ')[0])
return sorted(packages)
@memoize()
def _cached_GET(self, url):
return self.retried_GET(url).read()
def cached_GET(self, url):
if self.caching:
return self._cached_GET(url)
return self.retried_GET(url).read()
def retried_GET(self, url): def retried_GET(self, url):
try: try:
@ -63,22 +94,38 @@ class UpdateCrawler(object):
return self.retried_GET(url) return self.retried_GET(url)
raise e raise e
def _get_source_infos(self, project): def _meta_get_packagelist(self, prj, deleted=None, expand=False):
return self.retried_GET(makeurl(self.apiurl,
['source', project],
{
'view': 'info'
})).read()
def get_source_infos(self, project): query = {}
root = ET.fromstring(self._get_source_infos(project)) if deleted:
query['deleted'] = 1
if expand:
query['expand'] = 1
u = osc.core.makeurl(self.apiurl, ['source', prj], query)
return self.cached_GET(u)
def meta_get_packagelist(self, prj, deleted=None, expand=False):
root = ET.fromstring(self._meta_get_packagelist(prj, deleted, expand))
return [ node.get('name') for node in root.findall('entry') if not node.get('name') == '_product' and not node.get('name').startswith('_product:') and not node.get('name').startswith('patchinfo.') ]
def _get_source_infos(self, project, packages):
query = [ 'view=info' ]
if packages:
query += [ 'package=%s'%quote_plus(p) for p in packages ]
return self.cached_GET(makeurl(self.apiurl,
['source', project],
query))
def get_source_infos(self, project, packages):
ret = dict() ret = dict()
for package in root.findall('sourceinfo'): for pkg_chunks in chunks(sorted(packages), 50):
# skip packages that come via project link root = ET.fromstring(self._get_source_infos(project, pkg_chunks))
# FIXME: OBS needs to implement expand=0 for view=info for package in root.findall('sourceinfo'):
if not package.find('originproject') is None: if package.findall('error'):
continue continue
ret[package.get('package')] = package ret[package.get('package')] = package
return ret return ret
def _submitrequest(self, src_project, src_package, rev, dst_project, def _submitrequest(self, src_project, src_package, rev, dst_project,
@ -96,20 +143,19 @@ class UpdateCrawler(object):
for r in reqs: for r in reqs:
for a in r.actions: for a in r.actions:
if a.to_xml().find('source').get('rev') == rev: if a.to_xml().find('source').get('rev') == rev:
logging.debug('found existing request {}'.format(r.req_id)) logging.debug('{}: found existing request {}'.format(dst_package, r.reqid))
foundrev = True foundrev = True
res = 0 res = 0
if not foundrev: if not foundrev:
print "creating submit request", src_project, src_package, rev, dst_project, dst_package print "creating submit request", src_project, src_package, rev, dst_project, dst_package
# XXX if not self.dryrun:
return 0 res = osc.core.create_submit_request(self.apiurl,
res = osc.core.create_submit_request(self.apiurl, src_project,
src_project, src_package,
src_package, dst_project,
dst_project, dst_package,
dst_package, orev=rev,
orev=rev, message=msg)
message=msg)
return res return res
def submitrequest(self, src_project, src_package, rev, dst_package): def submitrequest(self, src_project, src_package, rev, dst_package):
@ -134,10 +180,10 @@ class UpdateCrawler(object):
raise raise
def parse_lookup(self): def parse_lookup(self):
self.lookup = yaml.load(self._load_lookup_file()) self.lookup = yaml.safe_load(self._load_lookup_file())
def _load_lookup_file(self): def _load_lookup_file(self):
return http_GET(makeurl(self.apiurl, return self.cached_GET(makeurl(self.apiurl,
['source', self.to_prj, '00Meta', 'lookup.yml'])) ['source', self.to_prj, '00Meta', 'lookup.yml']))
def follow_link(self, project, package, rev, verifymd5): def follow_link(self, project, package, rev, verifymd5):
@ -165,6 +211,9 @@ class UpdateCrawler(object):
def update_targets(self, targets, sources): def update_targets(self, targets, sources):
for package, sourceinfo in sources.items(): for package, sourceinfo in sources.items():
if package.startswith('patchinfo.'):
continue
if self.filter_lookup and not self.lookup.get(package, '') in self.filter_lookup: if self.filter_lookup and not self.lookup.get(package, '') in self.filter_lookup:
continue continue
@ -206,10 +255,10 @@ class UpdateCrawler(object):
logging.error('Error creating the request for %s' % package) logging.error('Error creating the request for %s' % package)
def crawl(self): def crawl(self, packages):
"""Main method of the class that run the crawler.""" """Main method of the class that run the crawler."""
targets = self.get_source_infos(self.to_prj) targets = self.get_source_infos(self.to_prj, packages)
sources = self.get_source_infos(self.from_prj) sources = self.get_source_infos(self.from_prj, packages)
self.update_targets(targets, sources) self.update_targets(targets, sources)
@ -219,10 +268,18 @@ def main(args):
osc.conf.config['debug'] = args.osc_debug osc.conf.config['debug'] = args.osc_debug
uc = UpdateCrawler(args.from_prj, args.to_prj) uc = UpdateCrawler(args.from_prj, args.to_prj)
uc.caching = args.cache_requests
uc.dryrun = args.dry
if args.only_from: if args.only_from:
uc.filter_lookup.add(args.only_from) uc.filter_lookup.add(args.only_from)
uc.crawl() given_packages = args.packages
if not given_packages:
if args.all:
given_packages = uc.meta_get_packagelist(args.from_prj)
else:
given_packages = uc.latest_packages()
uc.crawl(given_packages)
if __name__ == '__main__': if __name__ == '__main__':
description = 'Create update SRs for Leap.' description = 'Create update SRs for Leap.'
@ -230,6 +287,8 @@ if __name__ == '__main__':
parser.add_argument('-A', '--apiurl', metavar='URL', help='API URL') parser.add_argument('-A', '--apiurl', metavar='URL', help='API URL')
parser.add_argument('-d', '--debug', action='store_true', parser.add_argument('-d', '--debug', action='store_true',
help='print info useful for debuging') help='print info useful for debuging')
parser.add_argument('-a', '--all', action='store_true',
help='check all packages')
parser.add_argument('-n', '--dry', action='store_true', parser.add_argument('-n', '--dry', action='store_true',
help='dry run, no POST, PUT, DELETE') help='dry run, no POST, PUT, DELETE')
parser.add_argument('-f', '--from', dest='from_prj', metavar='PROJECT', parser.add_argument('-f', '--from', dest='from_prj', metavar='PROJECT',
@ -241,6 +300,9 @@ if __name__ == '__main__':
parser.add_argument('--only-from', dest='only_from', metavar='PROJECT', parser.add_argument('--only-from', dest='only_from', metavar='PROJECT',
help='only submit packages that came from PROJECT') help='only submit packages that came from PROJECT')
parser.add_argument("--osc-debug", action="store_true", help="osc debug output") parser.add_argument("--osc-debug", action="store_true", help="osc debug output")
parser.add_argument('--cache-requests', action='store_true', default=False,
help='cache GET requests. Not recommended for daily use.')
parser.add_argument("packages", nargs='*', help="packages to check")
args = parser.parse_args() args = parser.parse_args()
@ -257,3 +319,5 @@ if __name__ == '__main__':
http_DELETE = dryrun('DELETE') http_DELETE = dryrun('DELETE')
sys.exit(main(args)) sys.exit(main(args))
# vim: sw=4 et