forked from importers/git-importer
Remove old history handling
This commit is contained in:
parent
9ed8abad2b
commit
e6a401d8ac
@ -51,18 +51,6 @@ def main():
|
|||||||
type=pathlib.Path,
|
type=pathlib.Path,
|
||||||
help="Local git repository directory",
|
help="Local git repository directory",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"-a",
|
|
||||||
"--search-ancestor",
|
|
||||||
action="store_true",
|
|
||||||
help="Search closest ancestor candidate for initial commit",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-d",
|
|
||||||
"--rebase-devel",
|
|
||||||
action="store_true",
|
|
||||||
help="The devel project with be rebased after a merge",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-g",
|
"-g",
|
||||||
"--gc",
|
"--gc",
|
||||||
@ -105,9 +93,7 @@ def main():
|
|||||||
args.repodir = pathlib.Path(args.package)
|
args.repodir = pathlib.Path(args.package)
|
||||||
|
|
||||||
# TODO: use a CLI parameter to describe the projects
|
# TODO: use a CLI parameter to describe the projects
|
||||||
importer = Importer(
|
importer = Importer(PROJECTS, args.package, args.repodir)
|
||||||
PROJECTS, args.package, args.repodir, args.search_ancestor, args.rebase_devel
|
|
||||||
)
|
|
||||||
importer.set_gc_interval(args.gc)
|
importer.set_gc_interval(args.gc)
|
||||||
importer.import_into_db()
|
importer.import_into_db()
|
||||||
importer.export_as_git()
|
importer.export_as_git()
|
||||||
|
@ -1,73 +0,0 @@
|
|||||||
import itertools
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
|
|
||||||
from lib.obs_revision import OBSRevision
|
|
||||||
|
|
||||||
|
|
||||||
class History:
|
|
||||||
"""Store the history of revisions of a package in different
|
|
||||||
projects.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, obs, package):
|
|
||||||
self.obs = obs
|
|
||||||
self.package = package
|
|
||||||
|
|
||||||
self.revisions = {}
|
|
||||||
|
|
||||||
def __contains__(self, project):
|
|
||||||
return project in self.revisions
|
|
||||||
|
|
||||||
def __getitem__(self, project):
|
|
||||||
return self.revisions[project]
|
|
||||||
|
|
||||||
def _extract_copypac(self, comment):
|
|
||||||
original_project = re.findall(
|
|
||||||
r"osc copypac from project:(.*) package:", comment
|
|
||||||
)
|
|
||||||
return original_project[0] if original_project else None
|
|
||||||
|
|
||||||
def sort_all_revisions(self):
|
|
||||||
"""Sort revisions for all projects, from older to newer"""
|
|
||||||
return sorted(itertools.chain(*self.revisions.values()), key=lambda x: x.time)
|
|
||||||
|
|
||||||
def find_revision(self, project, revisionid, accepted_at):
|
|
||||||
last_commited_revision = None
|
|
||||||
for r in self.revisions.get(project, []):
|
|
||||||
logging.debug(f"Find revision {revisionid} [{accepted_at}]: {r}")
|
|
||||||
if str(r.rev) == str(revisionid) or r.srcmd5 == revisionid:
|
|
||||||
if r.ignored:
|
|
||||||
logging.debug(
|
|
||||||
f"{r} fits but is ignored, returning {last_commited_revision}"
|
|
||||||
)
|
|
||||||
return last_commited_revision
|
|
||||||
else:
|
|
||||||
logging.debug(f"{r} fits")
|
|
||||||
return r
|
|
||||||
if r.time > accepted_at:
|
|
||||||
# if we can't find the right revision, we take the last
|
|
||||||
# commit. Before ~2012 the data was tracked really loosely
|
|
||||||
# (e.g. using different timezones and the state field was
|
|
||||||
# only introduced in 2016...)
|
|
||||||
logging.warning(
|
|
||||||
f"Deploying workaround for missing request revision - returning {last_commited_revision}"
|
|
||||||
)
|
|
||||||
return last_commited_revision
|
|
||||||
if r.commit:
|
|
||||||
last_commited_revision = r
|
|
||||||
logging.info("No commited revision found, returning None")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def find_last_rev_after_time(self, project, time):
|
|
||||||
# revs = self.projects.get(project, [])
|
|
||||||
# return next((r for r in reversed(revs) if r.time <= time), None)
|
|
||||||
prev = None
|
|
||||||
for rev in self.revisions.get(project, []):
|
|
||||||
if rev.time > time:
|
|
||||||
return prev
|
|
||||||
if rev.time == time:
|
|
||||||
return rev
|
|
||||||
prev = rev
|
|
||||||
return prev
|
|
238
lib/importer.py
238
lib/importer.py
@ -9,27 +9,14 @@ from lib.binary import is_binary_or_large
|
|||||||
from lib.db import DB
|
from lib.db import DB
|
||||||
from lib.db_revision import DBRevision
|
from lib.db_revision import DBRevision
|
||||||
from lib.git import Git
|
from lib.git import Git
|
||||||
from lib.history import History
|
|
||||||
from lib.obs import OBS
|
from lib.obs import OBS
|
||||||
from lib.obs_revision import OBSRevision
|
from lib.obs_revision import OBSRevision
|
||||||
from lib.proxy_sha256 import ProxySHA256, md5, sha256
|
from lib.proxy_sha256 import ProxySHA256, md5, sha256
|
||||||
from lib.tree_builder import AbstractWalker, TreeBuilder, TreeNode
|
from lib.tree_builder import AbstractWalker, TreeBuilder, TreeNode
|
||||||
from lib.user import User
|
from lib.user import User
|
||||||
|
|
||||||
|
|
||||||
def _files_hash(hash_alg, dirpath):
|
|
||||||
"""List of (filepath, md5) for a directory"""
|
|
||||||
# TODO: do it async or multythread
|
|
||||||
files = [f for f in dirpath.iterdir() if f.is_file()]
|
|
||||||
return [(f.parts[-1], hash_alg(f)) for f in files]
|
|
||||||
|
|
||||||
|
|
||||||
files_md5 = functools.partial(_files_hash, md5)
|
|
||||||
files_sha256 = functools.partial(_files_hash, sha256)
|
|
||||||
|
|
||||||
|
|
||||||
class Importer:
|
class Importer:
|
||||||
def __init__(self, projects, package, repodir, search_ancestor, rebase_devel):
|
def __init__(self, projects, package, repodir):
|
||||||
# The idea is to create each commit in order, and draw the
|
# The idea is to create each commit in order, and draw the
|
||||||
# same graph described by the revisions timeline. For that we
|
# same graph described by the revisions timeline. For that we
|
||||||
# need first to fetch all the revisions and sort them
|
# need first to fetch all the revisions and sort them
|
||||||
@ -49,8 +36,6 @@ class Importer:
|
|||||||
# (could be moved), and "factory" is not the root.
|
# (could be moved), and "factory" is not the root.
|
||||||
|
|
||||||
self.package = package
|
self.package = package
|
||||||
self.search_ancestor = search_ancestor
|
|
||||||
self.rebase_devel = rebase_devel
|
|
||||||
|
|
||||||
self.obs = OBS()
|
self.obs = OBS()
|
||||||
self.git = Git(
|
self.git = Git(
|
||||||
@ -62,8 +47,6 @@ class Importer:
|
|||||||
self.proxy_sha256 = ProxySHA256(self.obs, enabled=True)
|
self.proxy_sha256 = ProxySHA256(self.obs, enabled=True)
|
||||||
self.gc_interval = 200
|
self.gc_interval = 200
|
||||||
|
|
||||||
self.history = History(self.obs, self.package)
|
|
||||||
|
|
||||||
# Add the "devel" project
|
# Add the "devel" project
|
||||||
(project, branch, api_url) = projects[0]
|
(project, branch, api_url) = projects[0]
|
||||||
assert project == "openSUSE:Factory"
|
assert project == "openSUSE:Factory"
|
||||||
@ -140,7 +123,7 @@ class Importer:
|
|||||||
return
|
return
|
||||||
latest = DBRevision.latest_revision(db, project, package)
|
latest = DBRevision.latest_revision(db, project, package)
|
||||||
for r in root.findall("revision"):
|
for r in root.findall("revision"):
|
||||||
rev = OBSRevision(self.obs, self, project, package).parse(r)
|
rev = OBSRevision(self.obs, project, package).parse(r)
|
||||||
if not latest or rev.rev > latest.rev:
|
if not latest or rev.rev > latest.rev:
|
||||||
dbrev = DBRevision.import_obs_rev(db, rev)
|
dbrev = DBRevision.import_obs_rev(db, rev)
|
||||||
try:
|
try:
|
||||||
@ -289,7 +272,7 @@ class Importer:
|
|||||||
"""While walking the tree, record the commits to do one after the other. These
|
"""While walking the tree, record the commits to do one after the other. These
|
||||||
FlatNodes are in the end in the flats array."""
|
FlatNodes are in the end in the flats array."""
|
||||||
|
|
||||||
def __init__(self, rebase_devel) -> None:
|
def __init__(self, rebase_devel=False) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.flats = []
|
self.flats = []
|
||||||
# the rebase_devel won't work as such as rebasing the branch needs an explicit action
|
# the rebase_devel won't work as such as rebasing the branch needs an explicit action
|
||||||
@ -325,7 +308,7 @@ class Importer:
|
|||||||
|
|
||||||
self.last_merge = node
|
self.last_merge = node
|
||||||
|
|
||||||
ftw = FlatTreeWalker(self.rebase_devel)
|
ftw = FlatTreeWalker()
|
||||||
tree.walk(ftw)
|
tree.walk(ftw)
|
||||||
branch_state = {"factory": None, "devel": None}
|
branch_state = {"factory": None, "devel": None}
|
||||||
state_data = dict()
|
state_data = dict()
|
||||||
@ -452,216 +435,3 @@ class Importer:
|
|||||||
self.obs.request(number).import_into_db(db)
|
self.obs.request(number).import_into_db(db)
|
||||||
|
|
||||||
db.conn.commit()
|
db.conn.commit()
|
||||||
|
|
||||||
def import_new_revision_with_request(self, revision, request):
|
|
||||||
"""Create a new branch as a result of a merge"""
|
|
||||||
|
|
||||||
submitted_revision = self.history.find_revision(
|
|
||||||
request.source, request.revisionid, revision.time
|
|
||||||
)
|
|
||||||
if not submitted_revision:
|
|
||||||
logging.warning(f"Request {request} does not connect to a known revision")
|
|
||||||
return False
|
|
||||||
|
|
||||||
if not submitted_revision.commit:
|
|
||||||
# If the revision appointed by the request is not part of
|
|
||||||
# the git history, we can have an ordering problem. One
|
|
||||||
# example is "premake4".
|
|
||||||
self.import_revision(submitted_revision)
|
|
||||||
|
|
||||||
assert submitted_revision.commit is not None
|
|
||||||
|
|
||||||
project = revision.project
|
|
||||||
branch, _ = self.projects_info[project]
|
|
||||||
|
|
||||||
# TODO: add an empty commit marking the acceptenace of the request (see discussion in PR 2858)
|
|
||||||
self.git.branch(branch, submitted_revision.commit)
|
|
||||||
self.git.clean()
|
|
||||||
self.git.checkout(branch)
|
|
||||||
|
|
||||||
logging.info(f"Create new branch based on {submitted_revision.commit}")
|
|
||||||
revision.commit = submitted_revision.commit
|
|
||||||
|
|
||||||
def _rebase_branch_history(self, project, revision):
|
|
||||||
branch, _ = self.projects_info[project]
|
|
||||||
history = self.history[project]
|
|
||||||
revision_index = history.index(revision)
|
|
||||||
for index in range(revision_index + 1, len(history)):
|
|
||||||
revision = history[index]
|
|
||||||
# We are done when we have one non-commited revision
|
|
||||||
if not revision.commit:
|
|
||||||
return
|
|
||||||
logging.info(f"Rebasing {revision} from {branch}")
|
|
||||||
revision.commit = None
|
|
||||||
self.import_revision(revision)
|
|
||||||
|
|
||||||
def import_revision_with_request(self, revision, request):
|
|
||||||
"""Import a single revision via a merge"""
|
|
||||||
|
|
||||||
submitted_revision = self.history.find_revision(
|
|
||||||
request.source, request.revisionid, revision.time
|
|
||||||
)
|
|
||||||
if not submitted_revision:
|
|
||||||
logging.warning(f"Request {request} does not connect to a known revision")
|
|
||||||
return False
|
|
||||||
assert submitted_revision.commit is not None
|
|
||||||
|
|
||||||
# TODO: detect a revision, case in point
|
|
||||||
# Base:System/bash/284 -> rq683701 -> accept O:F/151
|
|
||||||
# -> autocommit Base:System/bash/285
|
|
||||||
# Revert lead to openSUSE:Factory/bash/152
|
|
||||||
# Base:System/286 restored the reverted code in devel project
|
|
||||||
# rq684575 was created and accepted as O:F/153
|
|
||||||
# But the 284-285 and the 285-286 changeset is seen as empty
|
|
||||||
# as the revert was never in Base:System, so the
|
|
||||||
# submitted_revision of 684575 has no commit
|
|
||||||
if submitted_revision.commit == "EMPTY":
|
|
||||||
logging.warning("Empty commit submitted?!")
|
|
||||||
return False
|
|
||||||
|
|
||||||
message = (
|
|
||||||
f"Accepting request {revision.requestid}: {revision.comment}\n\n{revision}"
|
|
||||||
)
|
|
||||||
commit = self.git.merge(
|
|
||||||
# TODO: revision.userid or request.creator?
|
|
||||||
f"OBS User {revision.userid}",
|
|
||||||
"null@suse.de",
|
|
||||||
revision.time,
|
|
||||||
message,
|
|
||||||
submitted_revision.commit,
|
|
||||||
)
|
|
||||||
|
|
||||||
if commit == "EMPTY":
|
|
||||||
logging.warning("Empty merge. Ignoring the revision and the request")
|
|
||||||
self.git.merge_abort()
|
|
||||||
revision.commit = commit
|
|
||||||
return False
|
|
||||||
|
|
||||||
if commit == "CONFLICT":
|
|
||||||
logging.info("Merge conflict. Downloading revision")
|
|
||||||
self.download(revision)
|
|
||||||
message = f"CONFLICT {message}"
|
|
||||||
commit = self.git.merge(
|
|
||||||
f"OBS User {revision.userid}",
|
|
||||||
"null@suse.de",
|
|
||||||
revision.time,
|
|
||||||
message,
|
|
||||||
submitted_revision.commit,
|
|
||||||
merged=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
assert commit and commit != "CONFLICT"
|
|
||||||
logging.info(f"Merge with {submitted_revision.commit} into {commit}")
|
|
||||||
revision.commit = commit
|
|
||||||
|
|
||||||
# TODO: There are more checks to do, like for example, the
|
|
||||||
# last commit into the non-devel branch should be a merge from
|
|
||||||
# the devel branch
|
|
||||||
if self.rebase_devel:
|
|
||||||
branch, _ = self.projects_info.get(request.source, (None, None))
|
|
||||||
if branch == "devel":
|
|
||||||
self.git.repo.references[f"refs/heads/{branch}"].set_target(commit)
|
|
||||||
self._rebase_branch_history(request.source, submitted_revision)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def matching_request(self, revision):
|
|
||||||
request = self.obs.request(revision.requestid)
|
|
||||||
if not request:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# to be handled by the caller
|
|
||||||
if request.type() != "submit":
|
|
||||||
return request
|
|
||||||
|
|
||||||
if request.source not in self.projects_info:
|
|
||||||
logging.info("Request from a non exported project")
|
|
||||||
return None
|
|
||||||
|
|
||||||
if request.target != revision.project:
|
|
||||||
# This seems to happen when the devel project gets
|
|
||||||
# reinitialized (for example, SR#943593 in 7zip, or
|
|
||||||
# SR#437901 in ColorFull)
|
|
||||||
logging.info("Request target different from current project")
|
|
||||||
return None
|
|
||||||
|
|
||||||
if request.source == request.target:
|
|
||||||
# this is not a merge, but a different way to do a
|
|
||||||
# contribution to the (devel) project - see bindfs's rev 1
|
|
||||||
logging.info("Request within the same project")
|
|
||||||
return None
|
|
||||||
|
|
||||||
return request
|
|
||||||
|
|
||||||
def import_revision(self, revision):
|
|
||||||
"""Import a single revision into git"""
|
|
||||||
project = revision.project
|
|
||||||
branch, api_url = self.projects_info[project]
|
|
||||||
|
|
||||||
logging.info(f"Importing [{revision}] to {branch}")
|
|
||||||
|
|
||||||
self.obs.change_url(api_url)
|
|
||||||
|
|
||||||
# Populate linkrev and replace srcmd5 from the linked
|
|
||||||
# revision. If the expansion fails, the revision will be ignored
|
|
||||||
# and not imported.
|
|
||||||
if not revision.check_expanded():
|
|
||||||
logging.warning(f"Broken revision")
|
|
||||||
revision.ignored = True
|
|
||||||
return
|
|
||||||
|
|
||||||
# When doing a SR, we see also a revision in the origin
|
|
||||||
# project with the outgoing request, but without changes in
|
|
||||||
# the project. We can ignore them.
|
|
||||||
#
|
|
||||||
# If there is a request ID, it will be filtered out later,
|
|
||||||
# when the target project is different from itself.
|
|
||||||
if revision.userid == "autobuild" and not revision.requestid:
|
|
||||||
logging.info("Ignoring autocommit")
|
|
||||||
revision.ignored = True
|
|
||||||
return
|
|
||||||
|
|
||||||
if revision.userid == "buildservice-autocommit":
|
|
||||||
logging.info("Ignoring autocommit")
|
|
||||||
revision.ignored = True
|
|
||||||
return
|
|
||||||
|
|
||||||
# Create the reference if the branch is new. If so return
|
|
||||||
# True.
|
|
||||||
new_branch = self.git.checkout(branch)
|
|
||||||
|
|
||||||
if revision.requestid:
|
|
||||||
request = self.matching_request(revision)
|
|
||||||
if request:
|
|
||||||
if request.type() == "delete":
|
|
||||||
# TODO: after this comes a restore, this should be collapsed
|
|
||||||
# before even hitting git
|
|
||||||
logging.info("Delete request ignored")
|
|
||||||
revision.ignored = True
|
|
||||||
return
|
|
||||||
|
|
||||||
logging.debug(f"Found matching request: #{revision.project} #{request}")
|
|
||||||
if new_branch:
|
|
||||||
self.import_new_revision_with_request(revision, request)
|
|
||||||
return
|
|
||||||
if self.import_revision_with_request(revision, request):
|
|
||||||
return
|
|
||||||
|
|
||||||
# Import revision as a single commit (without merging)
|
|
||||||
self.download(revision)
|
|
||||||
|
|
||||||
if new_branch or self.git.is_dirty():
|
|
||||||
commit = self.git.commit(
|
|
||||||
f"OBS User {revision.userid}",
|
|
||||||
"null@suse.de",
|
|
||||||
revision.time,
|
|
||||||
# TODO: Normalize better the commit message
|
|
||||||
f"{revision.comment}\n\n{revision}",
|
|
||||||
# Create an empty commit only if is a new branch
|
|
||||||
allow_empty=new_branch,
|
|
||||||
)
|
|
||||||
revision.commit = commit
|
|
||||||
logging.info(f"Commit {commit}")
|
|
||||||
else:
|
|
||||||
logging.info("Skip empty commit")
|
|
||||||
revision.ignored = True
|
|
||||||
|
@ -1,14 +1,12 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
from urllib.error import HTTPError
|
from urllib.error import HTTPError
|
||||||
|
|
||||||
|
|
||||||
class OBSRevision:
|
class OBSRevision:
|
||||||
def __init__(self, obs, history, project, package):
|
def __init__(self, obs, project, package):
|
||||||
self.obs = obs
|
self.obs = obs
|
||||||
self.history = history
|
|
||||||
self.project = project
|
self.project = project
|
||||||
self.package = package
|
self.package = package
|
||||||
|
|
||||||
@ -72,46 +70,3 @@ class OBSRevision:
|
|||||||
logging.debug("No _link for the revision")
|
logging.debug("No _link for the revision")
|
||||||
return None
|
return None
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def check_link(self):
|
|
||||||
"""Add 'linkrev' attribute into the revision. Returns False if the link is invalid"""
|
|
||||||
try:
|
|
||||||
root = self.read_link()
|
|
||||||
if root is None:
|
|
||||||
return True
|
|
||||||
target_project = root.get("project")
|
|
||||||
except ET.ParseError:
|
|
||||||
logging.error(
|
|
||||||
f"_link can't be parsed [{self.project}/{self.package} rev={self.unexpanded_srcmd5}]"
|
|
||||||
)
|
|
||||||
return False
|
|
||||||
|
|
||||||
rev = self.history.find_last_rev_after_time(target_project, self.time)
|
|
||||||
if rev:
|
|
||||||
logging.debug(f"Linkrev found: {rev}")
|
|
||||||
self.linkrev = rev.srcmd5
|
|
||||||
return True
|
|
||||||
|
|
||||||
def check_expanded(self):
|
|
||||||
# Even if it's not a link we still need to check the expanded
|
|
||||||
# srcmd5 as it's possible used in submit requests
|
|
||||||
if not self.check_link():
|
|
||||||
return False
|
|
||||||
|
|
||||||
# If there is a "linkrev", "rev" is ignored
|
|
||||||
params = {"rev": self.srcmd5, "expand": "1"}
|
|
||||||
if self.linkrev:
|
|
||||||
params["linkrev"] = self.linkrev
|
|
||||||
|
|
||||||
try:
|
|
||||||
root = self.obs._xml(f"source/{self.project}/{self.package}", **params)
|
|
||||||
except HTTPError as e:
|
|
||||||
if e.code == 400:
|
|
||||||
logging.error(
|
|
||||||
f"Package [{self.project}/{self.package} {params}] can't be expanded: {e}"
|
|
||||||
)
|
|
||||||
return False
|
|
||||||
raise e
|
|
||||||
|
|
||||||
self.srcmd5 = root.get("srcmd5")
|
|
||||||
return True
|
|
||||||
|
@ -3,7 +3,6 @@ import xml.etree.ElementTree as ET
|
|||||||
|
|
||||||
from lib.db import DB
|
from lib.db import DB
|
||||||
from lib.db_revision import DBRevision
|
from lib.db_revision import DBRevision
|
||||||
from lib.history import History
|
|
||||||
from lib.obs import OBS
|
from lib.obs import OBS
|
||||||
from lib.obs_revision import OBSRevision
|
from lib.obs_revision import OBSRevision
|
||||||
|
|
||||||
@ -12,10 +11,9 @@ class TestDBMethods(unittest.TestCase):
|
|||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.db = DB(section="test")
|
self.db = DB(section="test")
|
||||||
self.obs = OBS()
|
self.obs = OBS()
|
||||||
self.history = History(self.obs, "xz")
|
|
||||||
|
|
||||||
def test_import(self):
|
def test_import(self):
|
||||||
test_rev = OBSRevision(self.obs, self.history, "openSUSE:Factory", "xz")
|
test_rev = OBSRevision(self.obs, "openSUSE:Factory", "xz")
|
||||||
test_rev.parse(
|
test_rev.parse(
|
||||||
ET.fromstring(
|
ET.fromstring(
|
||||||
"""<revision rev="70" vrev="1">
|
"""<revision rev="70" vrev="1">
|
||||||
|
Loading…
Reference in New Issue
Block a user