openSUSE-release-tools/manager_42.py

406 lines
16 KiB
Python
Raw Normal View History

2015-08-04 08:01:32 +02:00
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2015-2017 SUSE Linux GmbH
2015-08-04 08:01:32 +02:00
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import argparse
import itertools
import logging
import sys
from xml.etree import cElementTree as ET
import osc.conf
import osc.core
import urllib2
import time
import yaml
from collections import namedtuple
2015-08-04 08:01:32 +02:00
from osclib.memoize import memoize
2016-05-09 17:07:17 +02:00
logger = logging.getLogger()
2015-08-04 08:01:32 +02:00
makeurl = osc.core.makeurl
http_GET = osc.core.http_GET
http_DELETE = osc.core.http_DELETE
http_PUT = osc.core.http_PUT
2015-08-04 16:46:32 +02:00
http_POST = osc.core.http_POST
2015-08-04 08:01:32 +02:00
2016-05-09 13:18:20 +02:00
class Manager42(object):
2017-01-13 12:54:22 +01:00
config_defaults = {
2018-01-02 09:54:14 +01:00
'ignored_packages' : [
'00Meta',
'00aggregates',
'000product',
'000package-groups',
'000release-packages',
],
2017-01-13 12:54:22 +01:00
'project_preference_order' : [],
'drop_if_vanished_from' : [],
2017-01-13 12:54:22 +01:00
'from_prj' : 'openSUSE:Leap:42.3',
'factory' : 'openSUSE:Factory',
}
def __init__(self, caching = True, configfh = None):
2015-08-17 13:36:31 +02:00
self.caching = caching
2015-08-04 08:01:32 +02:00
self.apiurl = osc.conf.config['apiurl']
self.config = self._load_config(configfh)
self.force = False
self.parse_lookup(self.config.from_prj)
self.fill_package_meta()
2015-08-04 22:26:45 +02:00
self.packages = dict()
for project in [self.config.from_prj] + self.config.project_preference_order:
2017-12-22 11:24:56 +01:00
self._fill_package_list(project)
2015-08-13 11:09:29 +02:00
# FIXME: add to ToolBase and rebase Manager42 on that
def _load_config(self, handle = None):
2017-01-13 12:54:22 +01:00
d = self.__class__.config_defaults
y = yaml.safe_load(handle) if handle is not None else {}
return namedtuple('BotConfig', sorted(d.keys()))(*[ y.get(p, d[p]) for p in sorted(d.keys()) ])
def latest_packages(self):
data = self.cached_GET(makeurl(self.apiurl,
['project', 'latest_commits',
self.config.from_prj]))
lc = ET.fromstring(data)
packages = set()
for entry in lc.findall('{http://www.w3.org/2005/Atom}entry'):
title = entry.find('{http://www.w3.org/2005/Atom}title').text
if title.startswith('In '):
packages.add(title[3:].split(' ')[0])
return sorted(packages)
def all_packages(self):
return self.packages[self.config.from_prj]
def parse_lookup(self, project):
self.lookup_changes = 0
self.lookup = {}
try:
self.lookup = yaml.safe_load(self._load_lookup_file(project))
except urllib2.HTTPError as e:
if e.code != 404:
raise
def _load_lookup_file(self, prj):
return self.cached_GET(makeurl(self.apiurl,
['source', prj, '00Meta', 'lookup.yml']))
def _put_lookup_file(self, prj, data):
return http_PUT(makeurl(self.apiurl,
['source', prj, '00Meta', 'lookup.yml']), data=data)
def store_lookup(self):
2016-04-08 16:52:40 +02:00
if self.lookup_changes == 0:
2016-05-09 17:07:17 +02:00
logger.info('no change to lookup.yml')
2016-04-08 16:52:40 +02:00
return
data = yaml.dump(self.lookup, default_flow_style=False, explicit_start=True)
self._put_lookup_file(self.config.from_prj, data)
self.lookup_changes = 0
2015-08-17 13:36:31 +02:00
@memoize()
def _cached_GET(self, url):
return self.retried_GET(url).read()
2015-08-17 13:36:31 +02:00
def cached_GET(self, url):
if self.caching:
2015-08-19 13:40:55 +02:00
return self._cached_GET(url)
return self.retried_GET(url).read()
def retried_GET(self, url):
try:
return http_GET(url)
except urllib2.HTTPError as e:
2015-08-30 17:00:21 +02:00
if 500 <= e.code <= 599:
2016-05-09 17:07:17 +02:00
logger.warn('Retrying {}'.format(url))
time.sleep(1)
return self.retried_GET(url)
raise e
2015-08-17 13:36:31 +02:00
2015-08-04 08:01:32 +02:00
def get_source_packages(self, project, expand=False):
"""Return the list of packages in a project."""
query = {'expand': 1} if expand else {}
try:
root = ET.fromstring(
self.cached_GET(makeurl(self.apiurl,
['source', project],
query=query)))
packages = [i.get('name') for i in root.findall('entry')]
except urllib2.HTTPError as e:
if e.code == 404:
logger.error("{}: {}".format(project, e))
packages = []
else:
raise
2015-08-04 08:01:32 +02:00
return packages
def _get_source_package(self, project, package, revision):
opts = { 'view': 'info' }
if revision:
opts['rev'] = revision
2015-08-17 13:36:31 +02:00
return self.cached_GET(makeurl(self.apiurl,
['source', project, package], opts))
2015-08-13 11:09:29 +02:00
def crawl(self, packages):
"""Main method of the class that runs the crawler."""
2015-08-04 08:01:32 +02:00
for package in sorted(packages):
try:
self.check_one_package(package)
except urllib2.HTTPError as e:
2016-05-09 17:07:17 +02:00
logger.error("Failed to check {}: {}".format(package, e))
pass
# avoid loosing too much work
if self.lookup_changes > 50:
self.store_lookup()
if self.lookup_changes:
self.store_lookup()
2016-04-11 14:00:04 +02:00
def get_inconsistent(self):
known = set(self.lookup.keys())
stale = known - set(self.packages[self.config.from_prj])
unknown = set(self.packages[self.config.from_prj]) - known
if (stale):
logger.info("stale packages: %s", ', '.join(stale))
if (unknown):
logger.info("unknown packages: %s", ', '.join(unknown))
return (stale|unknown)
2016-04-11 14:00:04 +02:00
def get_package_history(self, project, package, deleted = False):
try:
query = {}
if deleted:
query['deleted'] = 1
return self.cached_GET(makeurl(self.apiurl,
['source', project, package, '_history'], query))
except urllib2.HTTPError as e:
2016-04-11 14:00:04 +02:00
if e.code == 404:
return None
raise
2018-01-02 09:54:14 +01:00
def _is_ignored(self, project, package):
if package in self.config.ignored_packages:
logger.debug("%s in ignore list", package)
return True
return False
2017-12-22 11:24:56 +01:00
def _fill_package_list(self, project):
if project not in self.packages:
2018-01-02 09:54:14 +01:00
self.packages[project] = [ p for p in self.get_source_packages(project) if not self._is_ignored(project, p) ]
2017-12-22 11:24:56 +01:00
def check_source_in_project(self, project, package, verifymd5, deleted=False):
self._fill_package_list(project)
2016-04-11 14:00:04 +02:00
if not deleted and not package in self.packages[project]:
return None, None
2015-08-17 13:36:31 +02:00
2016-04-11 14:00:04 +02:00
his = self.get_package_history(project, package, deleted)
if his is None:
return None, None
2015-08-13 11:09:29 +02:00
2015-08-04 16:46:32 +02:00
his = ET.fromstring(his)
historyrevs = dict()
2015-08-04 16:46:32 +02:00
revs = list()
for rev in his.findall('revision'):
historyrevs[rev.find('srcmd5').text] = rev.get('rev')
2015-08-04 16:46:32 +02:00
revs.append(rev.find('srcmd5').text)
revs.reverse()
for i in range(min(len(revs), 5)): # check last commits
2015-08-04 16:46:32 +02:00
srcmd5=revs.pop(0)
2015-08-17 13:36:31 +02:00
root = self.cached_GET(makeurl(self.apiurl,
['source', project, package], { 'rev': srcmd5, 'view': 'info'}))
2015-08-04 16:46:32 +02:00
root = ET.fromstring(root)
if root.get('verifymd5') == verifymd5:
return srcmd5, historyrevs[srcmd5]
return None, None
2015-08-13 11:09:29 +02:00
# check if we can find the srcmd5 in any of our underlay
# projects
def check_one_package(self, package):
lproject = self.lookup.get(package, None)
if not package in self.packages[self.config.from_prj]:
2016-05-09 17:07:17 +02:00
logger.info("{} vanished".format(package))
if self.lookup.get(package):
del self.lookup[package]
self.lookup_changes += 1
return
root = ET.fromstring(self._get_source_package(self.config.from_prj, package, None))
linked = root.find('linked')
if not linked is None and linked.get('package') != package:
lstring = 'subpackage of {}'.format(linked.get('package'))
if lstring != lproject:
logger.warn("{} links to {} (was {})".format(package, linked.get('package'), lproject))
self.lookup[package] = lstring
self.lookup_changes += 1
else:
logger.debug("{} correctly marked as subpackage of {}".format(package, linked.get('package')))
return
pm = self.package_metas[package]
devel = pm.find('devel')
if devel is not None or (lproject is not None and lproject.startswith('Devel;')):
develprj = None
develpkg = None
if devel is None:
(dummy, develprj, develpkg) = lproject.split(';')
2016-05-09 17:07:17 +02:00
logger.warn('{} lacks devel project setting {}/{}'.format(package, develprj, develpkg))
else:
develprj = devel.get('project')
develpkg = devel.get('package')
srcmd5, rev = self.check_source_in_project(develprj, develpkg,
root.get('verifymd5'))
if srcmd5:
lstring = 'Devel;{};{}'.format(develprj, develpkg)
if not package in self.lookup or lstring != self.lookup[package]:
2016-05-09 17:07:17 +02:00
logger.debug("{} from devel {}/{} (was {})".format(package, develprj, develpkg, lproject))
2016-04-08 16:52:40 +02:00
self.lookup[package] = lstring
self.lookup_changes += 1
else:
2016-05-09 17:07:17 +02:00
logger.debug("{} lookup from {}/{} is correct".format(package, develprj, develpkg))
return
2016-04-11 14:00:04 +02:00
elif lproject and lproject != 'FORK' and not lproject.startswith('subpackage '):
srcmd5, rev = self.check_source_in_project(lproject, package, root.get('verifymd5'))
if srcmd5:
2016-05-09 17:07:17 +02:00
logger.debug("{} lookup from {} is correct".format(package, lproject))
# if it's from Factory we check if the package can be found elsewhere meanwhile
if not self.force and lproject != self.config.factory:
return
elif lproject == self.config.factory and not package in self.packages[lproject]:
his = self.get_package_history(lproject, package, deleted=True)
if his:
2016-05-09 17:07:17 +02:00
logger.debug("{} got dropped from {}".format(package, lproject))
2016-05-09 17:07:17 +02:00
logger.debug("check where %s came from", package)
foundit = False
for project in self.config.project_preference_order:
srcmd5, rev = self.check_source_in_project(project, package, root.get('verifymd5'))
if srcmd5:
if project != lproject:
if project.endswith(':SLE-workarounds'):
logger.info('{} is from {} but should come from {}'.format(package, project, lproject))
else:
logger.info('{} -> {} (was {})'.format(package, project, lproject))
self.lookup[package] = project
self.lookup_changes += 1
else:
logger.debug('{} still coming from {}'.format(package, project))
foundit = True
break
if not foundit:
if lproject == 'FORK':
2016-05-09 17:07:17 +02:00
logger.debug("{}: lookup is correctly marked as fork".format(package))
elif lproject in self.config.drop_if_vanished_from:
logger.info('{} dropped from {}'.format(package, lproject))
else:
2016-05-09 17:07:17 +02:00
logger.info('{} is a fork (was {})'.format(package, lproject))
self.lookup[package] = 'FORK'
self.lookup_changes += 1
2015-08-04 16:46:32 +02:00
2015-08-04 22:26:45 +02:00
def get_link(self, project, package):
try:
2015-08-17 13:36:31 +02:00
link = self.cached_GET(makeurl(self.apiurl,
['source', project, package, '_link']))
2015-08-04 22:26:45 +02:00
except urllib2.HTTPError:
return None
return ET.fromstring(link)
2015-08-13 11:09:29 +02:00
def fill_package_meta(self):
self.package_metas = dict()
url = makeurl(self.apiurl, ['search', 'package'],
"match=[@project='%s']" % self.config.from_prj)
2016-04-08 16:52:40 +02:00
root = ET.fromstring(self.cached_GET(url))
for p in root.findall('package'):
name = p.attrib['name']
self.package_metas[name] = p
2015-08-19 17:29:09 +02:00
2015-08-13 11:09:29 +02:00
2015-08-04 08:01:32 +02:00
def main(args):
# Configure OSC
osc.conf.get_config(override_apiurl=args.apiurl)
osc.conf.config['debug'] = args.debug
uc = Manager42(caching = args.cache_requests, configfh = args.config )
given_packages = set(args.packages)
if args.all:
given_packages = set(uc.all_packages())
elif not given_packages:
given_packages = set(uc.latest_packages())
if args.check_inconsistent:
given_packages |= uc.get_inconsistent()
if args.force:
uc.force = True
uc.crawl(given_packages)
2015-08-13 11:09:29 +02:00
2015-08-04 08:01:32 +02:00
if __name__ == '__main__':
description = 'maintain 00Meta/lookup.yml'
2015-08-04 08:01:32 +02:00
parser = argparse.ArgumentParser(description=description)
parser.add_argument('-A', '--apiurl', metavar='URL', help='API URL')
parser.add_argument('-d', '--debug', action='store_true',
help='print info useful for debuging')
parser.add_argument('-a', '--all', action='store_true',
help='check all packages')
parser.add_argument('-c', '--config', dest='config', metavar='FILE',
type=argparse.FileType('r'), required = True,
help='read config file FILE')
2015-08-12 16:39:35 +02:00
parser.add_argument('-n', '--dry', action='store_true',
help='dry run, no POST, PUT, DELETE')
parser.add_argument('--force', action='store_true',
help='don\'t take previous lookup information into consideration')
2015-08-19 13:40:55 +02:00
parser.add_argument('--cache-requests', action='store_true', default=False,
help='cache GET requests. Not recommended for daily use.')
parser.add_argument('--check-inconsistent', action='store_true', default=False,
help='also check insonsistent lookup entries')
parser.add_argument("packages", nargs='*', help="packages to check")
2015-08-04 08:01:32 +02:00
args = parser.parse_args()
# Set logging configuration
logging.basicConfig(level=logging.DEBUG if args.debug
2016-05-09 17:07:17 +02:00
else logging.INFO,
format='%(asctime)s - %(module)s:%(lineno)d - %(levelname)s - %(message)s')
2015-08-04 08:01:32 +02:00
2015-08-12 16:39:35 +02:00
if args.dry:
def dryrun(t, *args, **kwargs):
2016-05-09 17:07:17 +02:00
return lambda *args, **kwargs: logger.debug("dryrun %s %s %s", t, args, str(kwargs)[:200])
2015-08-12 16:39:35 +02:00
http_POST = dryrun('POST')
http_PUT = dryrun('PUT')
http_DELETE = dryrun('DELETE')
2015-08-04 08:01:32 +02:00
sys.exit(main(args))
2015-08-17 13:36:31 +02:00