From 6524bb53c3a07998215e90177de97315d6d6064b Mon Sep 17 00:00:00 2001 From: Max Lin Date: Mon, 17 Aug 2015 18:16:30 +0800 Subject: [PATCH 1/6] Re-enable detach linked entry code Still need it to handle the request accepted from the corresponded place, ie. from self.project_preference_order. But now detach the linked entry and add requestid following with copy cmd, therefore, the following get_latest_request() in crawl will not fail. --- manager_42.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/manager_42.py b/manager_42.py index 6c63c037..0858300b 100755 --- a/manager_42.py +++ b/manager_42.py @@ -298,13 +298,14 @@ class UpdateCrawler(object): for project in self.projects: for package in self.packages[project]: if package in mypackages: - # XXX: why was this code here? -# # TODO: detach only if actually a link to the deleted package -# url = makeurl(self.apiurl, ['source', 'openSUSE:42', package], { 'opackage': package, 'oproject': 'openSUSE:42', 'cmd': 'copy', 'expand': '1'} ) -# try: -# http_POST(url) -# except urllib2.HTTPError, err: -# pass + # TODO: detach only if actually a link to the deleted package + requestid = self.get_latest_request(self.from_prj, package) + if not requestid is None: + url = makeurl(self.apiurl, ['source', self.from_prj, package], { 'opackage': package, 'oproject': self.from_prj, 'cmd': 'copy', 'requestid': requestid, 'expand': '1'}) + try: + http_POST(url) + except urllib2.HTTPError, err: + pass self.remove_packages(project, [package]) else: mypackages[package] = project From 31a8263a4fedbc1e14320d3a754a8e22a881aeb6 Mon Sep 17 00:00:00 2001 From: Ludwig Nussel Date: Mon, 17 Aug 2015 13:36:31 +0200 Subject: [PATCH 2/6] cache all or nothing --- manager_42.py | 74 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/manager_42.py b/manager_42.py index 0858300b..f321aa18 100755 --- a/manager_42.py +++ b/manager_42.py @@ -46,10 +46,10 @@ http_POST = osc.core.http_POST # as well. See build-service/src/api/app/models/package.rb -> find_linking_packages() class UpdateCrawler(object): - def __init__(self, from_prj): + def __init__(self, from_prj, caching = True): self.from_prj = from_prj + self.caching = caching self.apiurl = osc.conf.config['apiurl'] - self.debug = osc.conf.config['debug'] self.project_preference_order = [ 'SUSE:SLE-12-SP1:Update', 'SUSE:SLE-12-SP1:GA', @@ -72,30 +72,39 @@ class UpdateCrawler(object): self.project_mapping[prj] = self.from_prj + ':Factory-Copies' self.packages = dict() - for project in self.projects: + for project in self.projects + self.project_preference_order: self.packages[project] = self.get_source_packages(project) + @memoize() + def _cached_GET(self, url): + return http_GET(url).read() + + def cached_GET(self, url): + if self.caching: + return _cached_GET(url) + return http_GET(url).read() + def get_source_packages(self, project, expand=False): """Return the list of packages in a project.""" query = {'expand': 1} if expand else {} - root = ET.parse( - http_GET(makeurl(self.apiurl, + root = ET.fromstring( + self.cached_GET(makeurl(self.apiurl, ['source', project], - query=query))).getroot() + query=query))) packages = [i.get('name') for i in root.findall('entry')] return packages - @memoize() def _get_source_package(self, project, package, revision): opts = { 'view': 'info' } if revision: opts['rev'] = revision - return http_GET(makeurl(self.apiurl, - ['source', project, package], opts)).read() + return self.cached_GET(makeurl(self.apiurl, + ['source', project, package], opts)) + def get_latest_request(self, project, package): - history = http_GET(makeurl(self.apiurl, - ['source', project, package, '_history'])).read() + history = self.cached_GET(makeurl(self.apiurl, + ['source', project, package, '_history'])) root = ET.fromstring(history) requestid = None # latest commit's request - if latest commit is not a request, ignore the package @@ -106,8 +115,7 @@ class UpdateCrawler(object): return requestid.text def get_request_infos(self, requestid): - request = http_GET(makeurl(self.apiurl, - ['request', requestid])).read() + request = self.cached_GET(makeurl(self.apiurl, ['request', requestid])) root = ET.fromstring(request) action = root.find('.//action') source = action.find('source') @@ -131,7 +139,7 @@ class UpdateCrawler(object): # not existant package is ok, we delete them all pass else: - # If the package was there bug could not be delete, raise the error + # If the package was there but could not be delete, raise the error raise # copied from stagingapi - but the dependencies are too heavy @@ -174,6 +182,8 @@ class UpdateCrawler(object): self.upload_link(targetprj, targetpkg, link) for package in [ p for p in packages if p != targetpkg ]: + # FIXME: link packages from factory that override sle + # ones to different projecg logging.debug("linking %s -> %s", package, targetpkg) link = "".format(targetpkg) self.create_package_container(targetprj, package) @@ -217,9 +227,12 @@ class UpdateCrawler(object): return left_packages def check_source_in_project(self, project, package, verifymd5): + if not package in self.packages[project]: + return None + try: - his = http_GET(makeurl(self.apiurl, - ['source', project, package, '_history'])).read() + his = self.cached_GET(makeurl(self.apiurl, + ['source', project, package, '_history'])) except urllib2.HTTPError: return None @@ -230,8 +243,8 @@ class UpdateCrawler(object): revs.reverse() for i in range(min(len(revs), 5)): # check last 5 commits srcmd5=revs.pop(0) - root = http_GET(makeurl(self.apiurl, - ['source', project, package], { 'rev': srcmd5, 'view': 'info'})).read() + root = self.cached_GET(makeurl(self.apiurl, + ['source', project, package], { 'rev': srcmd5, 'view': 'info'})) root = ET.fromstring(root) if root.get('verifymd5') == verifymd5: return srcmd5 @@ -247,12 +260,17 @@ class UpdateCrawler(object): logging.warn("link mismatch: %s <> %s, subpackage?", linked.get('package'), package) continue + logging.debug("check where %s came", package) + foundit = False for project in self.project_preference_order: - logging.debug("check whether %s came from %s", package, project) srcmd5 = self.check_source_in_project(project, package, root.get('verifymd5')) if srcmd5: + logging.debug('%s -> %s', package, project) self.link_packages([ package ], project, package, srcmd5, self.project_mapping[project], package) + foundit = True break + if not foundit: + logging.debug('%s is a fork', package) def check_inner_link(self, project, package, link): if not link.get('cicount'): @@ -262,8 +280,8 @@ class UpdateCrawler(object): def get_link(self, project, package): try: - link = http_GET(makeurl(self.apiurl, - ['source', project, package, '_link'])).read() + link = self.cached_GET(makeurl(self.apiurl, + ['source', project, package, '_link'])) except urllib2.HTTPError: return None return ET.fromstring(link) @@ -282,8 +300,8 @@ class UpdateCrawler(object): opts = { 'view': 'info' } if rev: opts['rev'] = rev - root = http_GET(makeurl(self.apiurl, - ['source', link.get('project'), link.get('package')], opts )).read() + root = self.cached_GET(makeurl(self.apiurl, + ['source', link.get('project'), link.get('package')], opts )) root = ET.fromstring(root) self.link_packages([package], link.get('project'), link.get('package'), root.get('srcmd5'), project, package) @@ -312,7 +330,7 @@ class UpdateCrawler(object): def freeze_candidates(self): url = makeurl(self.apiurl, ['source', 'openSUSE:Factory'], { 'view': 'info' } ) - root = ET.fromstring(http_GET(url).read()) + root = ET.fromstring(self.cached_GET(url)) flink = ET.Element('frozenlinks') fl = ET.SubElement(flink, 'frozenlink', {'project': 'openSUSE:Factory'}) @@ -335,7 +353,7 @@ class UpdateCrawler(object): def check_multiple_specs(self, project): for package in self.packages[project]: url = makeurl(self.apiurl, ['source', project, package], { 'expand': '1' } ) - root = ET.fromstring(http_GET(url).read()) + root = ET.fromstring(self.cached_GET(url)) files = [ entry.get('name').replace('.spec', '') for entry in root.findall('entry') if entry.get('name').endswith('.spec') ] if len(files) == 1: continue @@ -362,7 +380,7 @@ def main(args): osc.conf.get_config(override_apiurl=args.apiurl) osc.conf.config['debug'] = args.debug - uc = UpdateCrawler(args.from_prj) + uc = UpdateCrawler(args.from_prj, caching = args.no_cache ) uc.check_dups() if not args.skip_sanity_checks: for prj in uc.subprojects: @@ -387,6 +405,8 @@ if __name__ == '__main__': help='don\'t do slow check for broken links (only for testing)') parser.add_argument('-n', '--dry', action='store_true', help='dry run, no POST, PUT, DELETE') + parser.add_argument('--no-cache', action='store_false', default=True, + help='do not cache GET requests') parser.add_argument("package", nargs='*', help="package to check") args = parser.parse_args() @@ -404,3 +424,5 @@ if __name__ == '__main__': http_DELETE = dryrun('DELETE') sys.exit(main(args)) + +# vim:sw=4 et From 8e51db54d4976b62e2ceee729c42ee43dccb9750 Mon Sep 17 00:00:00 2001 From: Ludwig Nussel Date: Mon, 17 Aug 2015 13:35:00 +0200 Subject: [PATCH 3/6] missing use of self.from_prj --- manager_42.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/manager_42.py b/manager_42.py index f321aa18..5edb39b0 100755 --- a/manager_42.py +++ b/manager_42.py @@ -173,9 +173,9 @@ class UpdateCrawler(object): def link_packages(self, packages, sourceprj, sourcepkg, sourcerev, targetprj, targetpkg): logging.info("update link %s/%s -> %s/%s@%s [%s]", targetprj, targetpkg, sourceprj, sourcepkg, sourcerev, ','.join(packages)) - self.remove_packages('openSUSE:42:SLE12-Picks', packages) - self.remove_packages('openSUSE:42:Factory-Copies', packages) - self.remove_packages('openSUSE:42:SLE-Pkgs-With-Overwrites', packages) + self.remove_packages('%s:SLE12-Picks'%self.from_prj, packages) + self.remove_packages('%s:Factory-Copies'%self.from_prj, packages) + self.remove_packages('%s:SLE-Pkgs-With-Overwrites'%self.from_prj, packages) self.create_package_container(targetprj, targetpkg) link = self._link_content(sourceprj, sourcepkg, sourcerev) @@ -347,8 +347,11 @@ class UpdateCrawler(object): 'srcmd5': package.get('srcmd5'), 'vrev': package.get('vrev') }) - url = makeurl(self.apiurl, ['source', 'openSUSE:42:Factory-Candidates-Check', '_project', '_frozenlinks'], {'meta': '1'}) - http_PUT(url, data=ET.tostring(flink)) + url = makeurl(self.apiurl, ['source', '%s:Factory-Candidates-Check'%self.from_prj, '_project', '_frozenlinks'], {'meta': '1'}) + try: + http_PUT(url, data=ET.tostring(flink)) + except urllib2.HTTPError, err: + logging.error(err) def check_multiple_specs(self, project): for package in self.packages[project]: From 8d60335b02657e88157651b455dcd894cae32d55 Mon Sep 17 00:00:00 2001 From: Ludwig Nussel Date: Mon, 17 Aug 2015 13:23:57 +0200 Subject: [PATCH 4/6] add --no-update-candidates --- manager_42.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/manager_42.py b/manager_42.py index 5edb39b0..a6df71b9 100755 --- a/manager_42.py +++ b/manager_42.py @@ -393,7 +393,8 @@ def main(args): if not args.skip_sanity_checks: for prj in uc.projects: uc.find_invalid_links(prj) - uc.freeze_candidates() + if args.no_update_candidates == False: + uc.freeze_candidates() if __name__ == '__main__': description = 'maintain sort openSUSE:42 packages into subprojects' @@ -410,6 +411,8 @@ if __name__ == '__main__': help='dry run, no POST, PUT, DELETE') parser.add_argument('--no-cache', action='store_false', default=True, help='do not cache GET requests') + parser.add_argument('--no-update-candidates', action='store_true', + help='don\'t update Factory candidates project') parser.add_argument("package", nargs='*', help="package to check") args = parser.parse_args() From 65e4b14ceaa74773991f9cc8131dbebace4e85d8 Mon Sep 17 00:00:00 2001 From: Ludwig Nussel Date: Mon, 17 Aug 2015 14:36:39 +0200 Subject: [PATCH 5/6] more debug --- manager_42.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/manager_42.py b/manager_42.py index a6df71b9..225fb178 100755 --- a/manager_42.py +++ b/manager_42.py @@ -316,6 +316,7 @@ class UpdateCrawler(object): for project in self.projects: for package in self.packages[project]: if package in mypackages: + logging.debug("duplicate %s/%s, in %s", project, package, mypackages[package]) # TODO: detach only if actually a link to the deleted package requestid = self.get_latest_request(self.from_prj, package) if not requestid is None: @@ -354,6 +355,7 @@ class UpdateCrawler(object): logging.error(err) def check_multiple_specs(self, project): + logging.debug("check for multiple specs in %s", project) for package in self.packages[project]: url = makeurl(self.apiurl, ['source', project, package], { 'expand': '1' } ) root = ET.fromstring(self.cached_GET(url)) From 5aebdbf1f58e27ee1d9574ad3361471e2dc8248a Mon Sep 17 00:00:00 2001 From: Ludwig Nussel Date: Mon, 17 Aug 2015 14:37:01 +0200 Subject: [PATCH 6/6] don't delete all packages :-) --- manager_42.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manager_42.py b/manager_42.py index 225fb178..8476fc62 100755 --- a/manager_42.py +++ b/manager_42.py @@ -373,8 +373,8 @@ class UpdateCrawler(object): files.remove(subpackage) for subpackage in files: - for prj in self.projects: - self.remove_packages(prj, self.packages[prj]) + if subpackage in self.packages[project]: + self.remove_packages(project, [subpackage]) link = "".format(mainpackage) self.create_package_container(project, subpackage)