From e6a401d8acef1cd50c2bda9dd5030d3c3282d1f9 Mon Sep 17 00:00:00 2001 From: Stephan Kulow Date: Tue, 1 Nov 2022 11:34:10 +0100 Subject: [PATCH] Remove old history handling --- git-importer.py | 16 +-- lib/history.py | 73 ------------ lib/importer.py | 238 +--------------------------------------- lib/obs_revision.py | 47 +------- tests/db_import_test.py | 4 +- 5 files changed, 7 insertions(+), 371 deletions(-) delete mode 100644 lib/history.py diff --git a/git-importer.py b/git-importer.py index 2f763b8..a50e393 100755 --- a/git-importer.py +++ b/git-importer.py @@ -51,18 +51,6 @@ def main(): type=pathlib.Path, help="Local git repository directory", ) - parser.add_argument( - "-a", - "--search-ancestor", - action="store_true", - help="Search closest ancestor candidate for initial commit", - ) - parser.add_argument( - "-d", - "--rebase-devel", - action="store_true", - help="The devel project with be rebased after a merge", - ) parser.add_argument( "-g", "--gc", @@ -105,9 +93,7 @@ def main(): args.repodir = pathlib.Path(args.package) # TODO: use a CLI parameter to describe the projects - importer = Importer( - PROJECTS, args.package, args.repodir, args.search_ancestor, args.rebase_devel - ) + importer = Importer(PROJECTS, args.package, args.repodir) importer.set_gc_interval(args.gc) importer.import_into_db() importer.export_as_git() diff --git a/lib/history.py b/lib/history.py deleted file mode 100644 index 1feab02..0000000 --- a/lib/history.py +++ /dev/null @@ -1,73 +0,0 @@ -import itertools -import logging -import re - -from lib.obs_revision import OBSRevision - - -class History: - """Store the history of revisions of a package in different - projects. - - """ - - def __init__(self, obs, package): - self.obs = obs - self.package = package - - self.revisions = {} - - def __contains__(self, project): - return project in self.revisions - - def __getitem__(self, project): - return self.revisions[project] - - def _extract_copypac(self, comment): - original_project = re.findall( - r"osc copypac from project:(.*) package:", comment - ) - return original_project[0] if original_project else None - - def sort_all_revisions(self): - """Sort revisions for all projects, from older to newer""" - return sorted(itertools.chain(*self.revisions.values()), key=lambda x: x.time) - - def find_revision(self, project, revisionid, accepted_at): - last_commited_revision = None - for r in self.revisions.get(project, []): - logging.debug(f"Find revision {revisionid} [{accepted_at}]: {r}") - if str(r.rev) == str(revisionid) or r.srcmd5 == revisionid: - if r.ignored: - logging.debug( - f"{r} fits but is ignored, returning {last_commited_revision}" - ) - return last_commited_revision - else: - logging.debug(f"{r} fits") - return r - if r.time > accepted_at: - # if we can't find the right revision, we take the last - # commit. Before ~2012 the data was tracked really loosely - # (e.g. using different timezones and the state field was - # only introduced in 2016...) - logging.warning( - f"Deploying workaround for missing request revision - returning {last_commited_revision}" - ) - return last_commited_revision - if r.commit: - last_commited_revision = r - logging.info("No commited revision found, returning None") - return None - - def find_last_rev_after_time(self, project, time): - # revs = self.projects.get(project, []) - # return next((r for r in reversed(revs) if r.time <= time), None) - prev = None - for rev in self.revisions.get(project, []): - if rev.time > time: - return prev - if rev.time == time: - return rev - prev = rev - return prev diff --git a/lib/importer.py b/lib/importer.py index f6884ce..85f90e7 100644 --- a/lib/importer.py +++ b/lib/importer.py @@ -9,27 +9,14 @@ from lib.binary import is_binary_or_large from lib.db import DB from lib.db_revision import DBRevision from lib.git import Git -from lib.history import History from lib.obs import OBS from lib.obs_revision import OBSRevision from lib.proxy_sha256 import ProxySHA256, md5, sha256 from lib.tree_builder import AbstractWalker, TreeBuilder, TreeNode from lib.user import User - -def _files_hash(hash_alg, dirpath): - """List of (filepath, md5) for a directory""" - # TODO: do it async or multythread - files = [f for f in dirpath.iterdir() if f.is_file()] - return [(f.parts[-1], hash_alg(f)) for f in files] - - -files_md5 = functools.partial(_files_hash, md5) -files_sha256 = functools.partial(_files_hash, sha256) - - class Importer: - def __init__(self, projects, package, repodir, search_ancestor, rebase_devel): + def __init__(self, projects, package, repodir): # The idea is to create each commit in order, and draw the # same graph described by the revisions timeline. For that we # need first to fetch all the revisions and sort them @@ -49,8 +36,6 @@ class Importer: # (could be moved), and "factory" is not the root. self.package = package - self.search_ancestor = search_ancestor - self.rebase_devel = rebase_devel self.obs = OBS() self.git = Git( @@ -62,8 +47,6 @@ class Importer: self.proxy_sha256 = ProxySHA256(self.obs, enabled=True) self.gc_interval = 200 - self.history = History(self.obs, self.package) - # Add the "devel" project (project, branch, api_url) = projects[0] assert project == "openSUSE:Factory" @@ -140,7 +123,7 @@ class Importer: return latest = DBRevision.latest_revision(db, project, package) for r in root.findall("revision"): - rev = OBSRevision(self.obs, self, project, package).parse(r) + rev = OBSRevision(self.obs, project, package).parse(r) if not latest or rev.rev > latest.rev: dbrev = DBRevision.import_obs_rev(db, rev) try: @@ -289,7 +272,7 @@ class Importer: """While walking the tree, record the commits to do one after the other. These FlatNodes are in the end in the flats array.""" - def __init__(self, rebase_devel) -> None: + def __init__(self, rebase_devel=False) -> None: super().__init__() self.flats = [] # the rebase_devel won't work as such as rebasing the branch needs an explicit action @@ -325,7 +308,7 @@ class Importer: self.last_merge = node - ftw = FlatTreeWalker(self.rebase_devel) + ftw = FlatTreeWalker() tree.walk(ftw) branch_state = {"factory": None, "devel": None} state_data = dict() @@ -452,216 +435,3 @@ class Importer: self.obs.request(number).import_into_db(db) db.conn.commit() - - def import_new_revision_with_request(self, revision, request): - """Create a new branch as a result of a merge""" - - submitted_revision = self.history.find_revision( - request.source, request.revisionid, revision.time - ) - if not submitted_revision: - logging.warning(f"Request {request} does not connect to a known revision") - return False - - if not submitted_revision.commit: - # If the revision appointed by the request is not part of - # the git history, we can have an ordering problem. One - # example is "premake4". - self.import_revision(submitted_revision) - - assert submitted_revision.commit is not None - - project = revision.project - branch, _ = self.projects_info[project] - - # TODO: add an empty commit marking the acceptenace of the request (see discussion in PR 2858) - self.git.branch(branch, submitted_revision.commit) - self.git.clean() - self.git.checkout(branch) - - logging.info(f"Create new branch based on {submitted_revision.commit}") - revision.commit = submitted_revision.commit - - def _rebase_branch_history(self, project, revision): - branch, _ = self.projects_info[project] - history = self.history[project] - revision_index = history.index(revision) - for index in range(revision_index + 1, len(history)): - revision = history[index] - # We are done when we have one non-commited revision - if not revision.commit: - return - logging.info(f"Rebasing {revision} from {branch}") - revision.commit = None - self.import_revision(revision) - - def import_revision_with_request(self, revision, request): - """Import a single revision via a merge""" - - submitted_revision = self.history.find_revision( - request.source, request.revisionid, revision.time - ) - if not submitted_revision: - logging.warning(f"Request {request} does not connect to a known revision") - return False - assert submitted_revision.commit is not None - - # TODO: detect a revision, case in point - # Base:System/bash/284 -> rq683701 -> accept O:F/151 - # -> autocommit Base:System/bash/285 - # Revert lead to openSUSE:Factory/bash/152 - # Base:System/286 restored the reverted code in devel project - # rq684575 was created and accepted as O:F/153 - # But the 284-285 and the 285-286 changeset is seen as empty - # as the revert was never in Base:System, so the - # submitted_revision of 684575 has no commit - if submitted_revision.commit == "EMPTY": - logging.warning("Empty commit submitted?!") - return False - - message = ( - f"Accepting request {revision.requestid}: {revision.comment}\n\n{revision}" - ) - commit = self.git.merge( - # TODO: revision.userid or request.creator? - f"OBS User {revision.userid}", - "null@suse.de", - revision.time, - message, - submitted_revision.commit, - ) - - if commit == "EMPTY": - logging.warning("Empty merge. Ignoring the revision and the request") - self.git.merge_abort() - revision.commit = commit - return False - - if commit == "CONFLICT": - logging.info("Merge conflict. Downloading revision") - self.download(revision) - message = f"CONFLICT {message}" - commit = self.git.merge( - f"OBS User {revision.userid}", - "null@suse.de", - revision.time, - message, - submitted_revision.commit, - merged=True, - ) - - assert commit and commit != "CONFLICT" - logging.info(f"Merge with {submitted_revision.commit} into {commit}") - revision.commit = commit - - # TODO: There are more checks to do, like for example, the - # last commit into the non-devel branch should be a merge from - # the devel branch - if self.rebase_devel: - branch, _ = self.projects_info.get(request.source, (None, None)) - if branch == "devel": - self.git.repo.references[f"refs/heads/{branch}"].set_target(commit) - self._rebase_branch_history(request.source, submitted_revision) - - return True - - def matching_request(self, revision): - request = self.obs.request(revision.requestid) - if not request: - return None - - # to be handled by the caller - if request.type() != "submit": - return request - - if request.source not in self.projects_info: - logging.info("Request from a non exported project") - return None - - if request.target != revision.project: - # This seems to happen when the devel project gets - # reinitialized (for example, SR#943593 in 7zip, or - # SR#437901 in ColorFull) - logging.info("Request target different from current project") - return None - - if request.source == request.target: - # this is not a merge, but a different way to do a - # contribution to the (devel) project - see bindfs's rev 1 - logging.info("Request within the same project") - return None - - return request - - def import_revision(self, revision): - """Import a single revision into git""" - project = revision.project - branch, api_url = self.projects_info[project] - - logging.info(f"Importing [{revision}] to {branch}") - - self.obs.change_url(api_url) - - # Populate linkrev and replace srcmd5 from the linked - # revision. If the expansion fails, the revision will be ignored - # and not imported. - if not revision.check_expanded(): - logging.warning(f"Broken revision") - revision.ignored = True - return - - # When doing a SR, we see also a revision in the origin - # project with the outgoing request, but without changes in - # the project. We can ignore them. - # - # If there is a request ID, it will be filtered out later, - # when the target project is different from itself. - if revision.userid == "autobuild" and not revision.requestid: - logging.info("Ignoring autocommit") - revision.ignored = True - return - - if revision.userid == "buildservice-autocommit": - logging.info("Ignoring autocommit") - revision.ignored = True - return - - # Create the reference if the branch is new. If so return - # True. - new_branch = self.git.checkout(branch) - - if revision.requestid: - request = self.matching_request(revision) - if request: - if request.type() == "delete": - # TODO: after this comes a restore, this should be collapsed - # before even hitting git - logging.info("Delete request ignored") - revision.ignored = True - return - - logging.debug(f"Found matching request: #{revision.project} #{request}") - if new_branch: - self.import_new_revision_with_request(revision, request) - return - if self.import_revision_with_request(revision, request): - return - - # Import revision as a single commit (without merging) - self.download(revision) - - if new_branch or self.git.is_dirty(): - commit = self.git.commit( - f"OBS User {revision.userid}", - "null@suse.de", - revision.time, - # TODO: Normalize better the commit message - f"{revision.comment}\n\n{revision}", - # Create an empty commit only if is a new branch - allow_empty=new_branch, - ) - revision.commit = commit - logging.info(f"Commit {commit}") - else: - logging.info("Skip empty commit") - revision.ignored = True diff --git a/lib/obs_revision.py b/lib/obs_revision.py index eafcdbb..30474c1 100644 --- a/lib/obs_revision.py +++ b/lib/obs_revision.py @@ -1,14 +1,12 @@ import datetime import logging import re -import xml.etree.ElementTree as ET from urllib.error import HTTPError class OBSRevision: - def __init__(self, obs, history, project, package): + def __init__(self, obs, project, package): self.obs = obs - self.history = history self.project = project self.package = package @@ -72,46 +70,3 @@ class OBSRevision: logging.debug("No _link for the revision") return None raise e - - def check_link(self): - """Add 'linkrev' attribute into the revision. Returns False if the link is invalid""" - try: - root = self.read_link() - if root is None: - return True - target_project = root.get("project") - except ET.ParseError: - logging.error( - f"_link can't be parsed [{self.project}/{self.package} rev={self.unexpanded_srcmd5}]" - ) - return False - - rev = self.history.find_last_rev_after_time(target_project, self.time) - if rev: - logging.debug(f"Linkrev found: {rev}") - self.linkrev = rev.srcmd5 - return True - - def check_expanded(self): - # Even if it's not a link we still need to check the expanded - # srcmd5 as it's possible used in submit requests - if not self.check_link(): - return False - - # If there is a "linkrev", "rev" is ignored - params = {"rev": self.srcmd5, "expand": "1"} - if self.linkrev: - params["linkrev"] = self.linkrev - - try: - root = self.obs._xml(f"source/{self.project}/{self.package}", **params) - except HTTPError as e: - if e.code == 400: - logging.error( - f"Package [{self.project}/{self.package} {params}] can't be expanded: {e}" - ) - return False - raise e - - self.srcmd5 = root.get("srcmd5") - return True diff --git a/tests/db_import_test.py b/tests/db_import_test.py index f84f46c..6545e72 100644 --- a/tests/db_import_test.py +++ b/tests/db_import_test.py @@ -3,7 +3,6 @@ import xml.etree.ElementTree as ET from lib.db import DB from lib.db_revision import DBRevision -from lib.history import History from lib.obs import OBS from lib.obs_revision import OBSRevision @@ -12,10 +11,9 @@ class TestDBMethods(unittest.TestCase): def setUp(self): self.db = DB(section="test") self.obs = OBS() - self.history = History(self.obs, "xz") def test_import(self): - test_rev = OBSRevision(self.obs, self.history, "openSUSE:Factory", "xz") + test_rev = OBSRevision(self.obs, "openSUSE:Factory", "xz") test_rev.parse( ET.fromstring( """