Remove old history handling

This commit is contained in:
Stephan Kulow 2022-11-01 11:34:10 +01:00
parent 9ed8abad2b
commit e6a401d8ac
5 changed files with 7 additions and 371 deletions

View File

@ -51,18 +51,6 @@ def main():
type=pathlib.Path, type=pathlib.Path,
help="Local git repository directory", help="Local git repository directory",
) )
parser.add_argument(
"-a",
"--search-ancestor",
action="store_true",
help="Search closest ancestor candidate for initial commit",
)
parser.add_argument(
"-d",
"--rebase-devel",
action="store_true",
help="The devel project with be rebased after a merge",
)
parser.add_argument( parser.add_argument(
"-g", "-g",
"--gc", "--gc",
@ -105,9 +93,7 @@ def main():
args.repodir = pathlib.Path(args.package) args.repodir = pathlib.Path(args.package)
# TODO: use a CLI parameter to describe the projects # TODO: use a CLI parameter to describe the projects
importer = Importer( importer = Importer(PROJECTS, args.package, args.repodir)
PROJECTS, args.package, args.repodir, args.search_ancestor, args.rebase_devel
)
importer.set_gc_interval(args.gc) importer.set_gc_interval(args.gc)
importer.import_into_db() importer.import_into_db()
importer.export_as_git() importer.export_as_git()

View File

@ -1,73 +0,0 @@
import itertools
import logging
import re
from lib.obs_revision import OBSRevision
class History:
"""Store the history of revisions of a package in different
projects.
"""
def __init__(self, obs, package):
self.obs = obs
self.package = package
self.revisions = {}
def __contains__(self, project):
return project in self.revisions
def __getitem__(self, project):
return self.revisions[project]
def _extract_copypac(self, comment):
original_project = re.findall(
r"osc copypac from project:(.*) package:", comment
)
return original_project[0] if original_project else None
def sort_all_revisions(self):
"""Sort revisions for all projects, from older to newer"""
return sorted(itertools.chain(*self.revisions.values()), key=lambda x: x.time)
def find_revision(self, project, revisionid, accepted_at):
last_commited_revision = None
for r in self.revisions.get(project, []):
logging.debug(f"Find revision {revisionid} [{accepted_at}]: {r}")
if str(r.rev) == str(revisionid) or r.srcmd5 == revisionid:
if r.ignored:
logging.debug(
f"{r} fits but is ignored, returning {last_commited_revision}"
)
return last_commited_revision
else:
logging.debug(f"{r} fits")
return r
if r.time > accepted_at:
# if we can't find the right revision, we take the last
# commit. Before ~2012 the data was tracked really loosely
# (e.g. using different timezones and the state field was
# only introduced in 2016...)
logging.warning(
f"Deploying workaround for missing request revision - returning {last_commited_revision}"
)
return last_commited_revision
if r.commit:
last_commited_revision = r
logging.info("No commited revision found, returning None")
return None
def find_last_rev_after_time(self, project, time):
# revs = self.projects.get(project, [])
# return next((r for r in reversed(revs) if r.time <= time), None)
prev = None
for rev in self.revisions.get(project, []):
if rev.time > time:
return prev
if rev.time == time:
return rev
prev = rev
return prev

View File

@ -9,27 +9,14 @@ from lib.binary import is_binary_or_large
from lib.db import DB from lib.db import DB
from lib.db_revision import DBRevision from lib.db_revision import DBRevision
from lib.git import Git from lib.git import Git
from lib.history import History
from lib.obs import OBS from lib.obs import OBS
from lib.obs_revision import OBSRevision from lib.obs_revision import OBSRevision
from lib.proxy_sha256 import ProxySHA256, md5, sha256 from lib.proxy_sha256 import ProxySHA256, md5, sha256
from lib.tree_builder import AbstractWalker, TreeBuilder, TreeNode from lib.tree_builder import AbstractWalker, TreeBuilder, TreeNode
from lib.user import User from lib.user import User
def _files_hash(hash_alg, dirpath):
"""List of (filepath, md5) for a directory"""
# TODO: do it async or multythread
files = [f for f in dirpath.iterdir() if f.is_file()]
return [(f.parts[-1], hash_alg(f)) for f in files]
files_md5 = functools.partial(_files_hash, md5)
files_sha256 = functools.partial(_files_hash, sha256)
class Importer: class Importer:
def __init__(self, projects, package, repodir, search_ancestor, rebase_devel): def __init__(self, projects, package, repodir):
# The idea is to create each commit in order, and draw the # The idea is to create each commit in order, and draw the
# same graph described by the revisions timeline. For that we # same graph described by the revisions timeline. For that we
# need first to fetch all the revisions and sort them # need first to fetch all the revisions and sort them
@ -49,8 +36,6 @@ class Importer:
# (could be moved), and "factory" is not the root. # (could be moved), and "factory" is not the root.
self.package = package self.package = package
self.search_ancestor = search_ancestor
self.rebase_devel = rebase_devel
self.obs = OBS() self.obs = OBS()
self.git = Git( self.git = Git(
@ -62,8 +47,6 @@ class Importer:
self.proxy_sha256 = ProxySHA256(self.obs, enabled=True) self.proxy_sha256 = ProxySHA256(self.obs, enabled=True)
self.gc_interval = 200 self.gc_interval = 200
self.history = History(self.obs, self.package)
# Add the "devel" project # Add the "devel" project
(project, branch, api_url) = projects[0] (project, branch, api_url) = projects[0]
assert project == "openSUSE:Factory" assert project == "openSUSE:Factory"
@ -140,7 +123,7 @@ class Importer:
return return
latest = DBRevision.latest_revision(db, project, package) latest = DBRevision.latest_revision(db, project, package)
for r in root.findall("revision"): for r in root.findall("revision"):
rev = OBSRevision(self.obs, self, project, package).parse(r) rev = OBSRevision(self.obs, project, package).parse(r)
if not latest or rev.rev > latest.rev: if not latest or rev.rev > latest.rev:
dbrev = DBRevision.import_obs_rev(db, rev) dbrev = DBRevision.import_obs_rev(db, rev)
try: try:
@ -289,7 +272,7 @@ class Importer:
"""While walking the tree, record the commits to do one after the other. These """While walking the tree, record the commits to do one after the other. These
FlatNodes are in the end in the flats array.""" FlatNodes are in the end in the flats array."""
def __init__(self, rebase_devel) -> None: def __init__(self, rebase_devel=False) -> None:
super().__init__() super().__init__()
self.flats = [] self.flats = []
# the rebase_devel won't work as such as rebasing the branch needs an explicit action # the rebase_devel won't work as such as rebasing the branch needs an explicit action
@ -325,7 +308,7 @@ class Importer:
self.last_merge = node self.last_merge = node
ftw = FlatTreeWalker(self.rebase_devel) ftw = FlatTreeWalker()
tree.walk(ftw) tree.walk(ftw)
branch_state = {"factory": None, "devel": None} branch_state = {"factory": None, "devel": None}
state_data = dict() state_data = dict()
@ -452,216 +435,3 @@ class Importer:
self.obs.request(number).import_into_db(db) self.obs.request(number).import_into_db(db)
db.conn.commit() db.conn.commit()
def import_new_revision_with_request(self, revision, request):
"""Create a new branch as a result of a merge"""
submitted_revision = self.history.find_revision(
request.source, request.revisionid, revision.time
)
if not submitted_revision:
logging.warning(f"Request {request} does not connect to a known revision")
return False
if not submitted_revision.commit:
# If the revision appointed by the request is not part of
# the git history, we can have an ordering problem. One
# example is "premake4".
self.import_revision(submitted_revision)
assert submitted_revision.commit is not None
project = revision.project
branch, _ = self.projects_info[project]
# TODO: add an empty commit marking the acceptenace of the request (see discussion in PR 2858)
self.git.branch(branch, submitted_revision.commit)
self.git.clean()
self.git.checkout(branch)
logging.info(f"Create new branch based on {submitted_revision.commit}")
revision.commit = submitted_revision.commit
def _rebase_branch_history(self, project, revision):
branch, _ = self.projects_info[project]
history = self.history[project]
revision_index = history.index(revision)
for index in range(revision_index + 1, len(history)):
revision = history[index]
# We are done when we have one non-commited revision
if not revision.commit:
return
logging.info(f"Rebasing {revision} from {branch}")
revision.commit = None
self.import_revision(revision)
def import_revision_with_request(self, revision, request):
"""Import a single revision via a merge"""
submitted_revision = self.history.find_revision(
request.source, request.revisionid, revision.time
)
if not submitted_revision:
logging.warning(f"Request {request} does not connect to a known revision")
return False
assert submitted_revision.commit is not None
# TODO: detect a revision, case in point
# Base:System/bash/284 -> rq683701 -> accept O:F/151
# -> autocommit Base:System/bash/285
# Revert lead to openSUSE:Factory/bash/152
# Base:System/286 restored the reverted code in devel project
# rq684575 was created and accepted as O:F/153
# But the 284-285 and the 285-286 changeset is seen as empty
# as the revert was never in Base:System, so the
# submitted_revision of 684575 has no commit
if submitted_revision.commit == "EMPTY":
logging.warning("Empty commit submitted?!")
return False
message = (
f"Accepting request {revision.requestid}: {revision.comment}\n\n{revision}"
)
commit = self.git.merge(
# TODO: revision.userid or request.creator?
f"OBS User {revision.userid}",
"null@suse.de",
revision.time,
message,
submitted_revision.commit,
)
if commit == "EMPTY":
logging.warning("Empty merge. Ignoring the revision and the request")
self.git.merge_abort()
revision.commit = commit
return False
if commit == "CONFLICT":
logging.info("Merge conflict. Downloading revision")
self.download(revision)
message = f"CONFLICT {message}"
commit = self.git.merge(
f"OBS User {revision.userid}",
"null@suse.de",
revision.time,
message,
submitted_revision.commit,
merged=True,
)
assert commit and commit != "CONFLICT"
logging.info(f"Merge with {submitted_revision.commit} into {commit}")
revision.commit = commit
# TODO: There are more checks to do, like for example, the
# last commit into the non-devel branch should be a merge from
# the devel branch
if self.rebase_devel:
branch, _ = self.projects_info.get(request.source, (None, None))
if branch == "devel":
self.git.repo.references[f"refs/heads/{branch}"].set_target(commit)
self._rebase_branch_history(request.source, submitted_revision)
return True
def matching_request(self, revision):
request = self.obs.request(revision.requestid)
if not request:
return None
# to be handled by the caller
if request.type() != "submit":
return request
if request.source not in self.projects_info:
logging.info("Request from a non exported project")
return None
if request.target != revision.project:
# This seems to happen when the devel project gets
# reinitialized (for example, SR#943593 in 7zip, or
# SR#437901 in ColorFull)
logging.info("Request target different from current project")
return None
if request.source == request.target:
# this is not a merge, but a different way to do a
# contribution to the (devel) project - see bindfs's rev 1
logging.info("Request within the same project")
return None
return request
def import_revision(self, revision):
"""Import a single revision into git"""
project = revision.project
branch, api_url = self.projects_info[project]
logging.info(f"Importing [{revision}] to {branch}")
self.obs.change_url(api_url)
# Populate linkrev and replace srcmd5 from the linked
# revision. If the expansion fails, the revision will be ignored
# and not imported.
if not revision.check_expanded():
logging.warning(f"Broken revision")
revision.ignored = True
return
# When doing a SR, we see also a revision in the origin
# project with the outgoing request, but without changes in
# the project. We can ignore them.
#
# If there is a request ID, it will be filtered out later,
# when the target project is different from itself.
if revision.userid == "autobuild" and not revision.requestid:
logging.info("Ignoring autocommit")
revision.ignored = True
return
if revision.userid == "buildservice-autocommit":
logging.info("Ignoring autocommit")
revision.ignored = True
return
# Create the reference if the branch is new. If so return
# True.
new_branch = self.git.checkout(branch)
if revision.requestid:
request = self.matching_request(revision)
if request:
if request.type() == "delete":
# TODO: after this comes a restore, this should be collapsed
# before even hitting git
logging.info("Delete request ignored")
revision.ignored = True
return
logging.debug(f"Found matching request: #{revision.project} #{request}")
if new_branch:
self.import_new_revision_with_request(revision, request)
return
if self.import_revision_with_request(revision, request):
return
# Import revision as a single commit (without merging)
self.download(revision)
if new_branch or self.git.is_dirty():
commit = self.git.commit(
f"OBS User {revision.userid}",
"null@suse.de",
revision.time,
# TODO: Normalize better the commit message
f"{revision.comment}\n\n{revision}",
# Create an empty commit only if is a new branch
allow_empty=new_branch,
)
revision.commit = commit
logging.info(f"Commit {commit}")
else:
logging.info("Skip empty commit")
revision.ignored = True

View File

@ -1,14 +1,12 @@
import datetime import datetime
import logging import logging
import re import re
import xml.etree.ElementTree as ET
from urllib.error import HTTPError from urllib.error import HTTPError
class OBSRevision: class OBSRevision:
def __init__(self, obs, history, project, package): def __init__(self, obs, project, package):
self.obs = obs self.obs = obs
self.history = history
self.project = project self.project = project
self.package = package self.package = package
@ -72,46 +70,3 @@ class OBSRevision:
logging.debug("No _link for the revision") logging.debug("No _link for the revision")
return None return None
raise e raise e
def check_link(self):
"""Add 'linkrev' attribute into the revision. Returns False if the link is invalid"""
try:
root = self.read_link()
if root is None:
return True
target_project = root.get("project")
except ET.ParseError:
logging.error(
f"_link can't be parsed [{self.project}/{self.package} rev={self.unexpanded_srcmd5}]"
)
return False
rev = self.history.find_last_rev_after_time(target_project, self.time)
if rev:
logging.debug(f"Linkrev found: {rev}")
self.linkrev = rev.srcmd5
return True
def check_expanded(self):
# Even if it's not a link we still need to check the expanded
# srcmd5 as it's possible used in submit requests
if not self.check_link():
return False
# If there is a "linkrev", "rev" is ignored
params = {"rev": self.srcmd5, "expand": "1"}
if self.linkrev:
params["linkrev"] = self.linkrev
try:
root = self.obs._xml(f"source/{self.project}/{self.package}", **params)
except HTTPError as e:
if e.code == 400:
logging.error(
f"Package [{self.project}/{self.package} {params}] can't be expanded: {e}"
)
return False
raise e
self.srcmd5 = root.get("srcmd5")
return True

View File

@ -3,7 +3,6 @@ import xml.etree.ElementTree as ET
from lib.db import DB from lib.db import DB
from lib.db_revision import DBRevision from lib.db_revision import DBRevision
from lib.history import History
from lib.obs import OBS from lib.obs import OBS
from lib.obs_revision import OBSRevision from lib.obs_revision import OBSRevision
@ -12,10 +11,9 @@ class TestDBMethods(unittest.TestCase):
def setUp(self): def setUp(self):
self.db = DB(section="test") self.db = DB(section="test")
self.obs = OBS() self.obs = OBS()
self.history = History(self.obs, "xz")
def test_import(self): def test_import(self):
test_rev = OBSRevision(self.obs, self.history, "openSUSE:Factory", "xz") test_rev = OBSRevision(self.obs, "openSUSE:Factory", "xz")
test_rev.parse( test_rev.parse(
ET.fromstring( ET.fromstring(
"""<revision rev="70" vrev="1"> """<revision rev="70" vrev="1">