bfdade8ecf
Walk the node tree and record the parents, then reverse the tree so we can have the exact order in which to create git commits
616 lines
24 KiB
Python
616 lines
24 KiB
Python
import functools
|
|
import logging
|
|
import xml.etree.ElementTree as ET
|
|
|
|
from lib.binary import is_binary_or_large
|
|
from lib.db import DB
|
|
from lib.db_revision import DBRevision
|
|
from lib.git import Git
|
|
from lib.history import History
|
|
from lib.obs import OBS
|
|
from lib.obs_revision import OBSRevision
|
|
from lib.proxy_sha256 import ProxySHA256, md5, sha256
|
|
from lib.tree_builder import AbstractWalker, TreeBuilder, TreeNode
|
|
from lib.user import User
|
|
|
|
|
|
def _files_hash(hash_alg, dirpath):
|
|
"""List of (filepath, md5) for a directory"""
|
|
# TODO: do it async or multythread
|
|
files = [f for f in dirpath.iterdir() if f.is_file()]
|
|
return [(f.parts[-1], hash_alg(f)) for f in files]
|
|
|
|
|
|
files_md5 = functools.partial(_files_hash, md5)
|
|
files_sha256 = functools.partial(_files_hash, sha256)
|
|
|
|
|
|
class Importer:
|
|
def __init__(self, projects, package, repodir, search_ancestor, rebase_devel):
|
|
# The idea is to create each commit in order, and draw the
|
|
# same graph described by the revisions timeline. For that we
|
|
# need first to fetch all the revisions and sort them
|
|
# linearly, based on the timestamp.
|
|
#
|
|
# After that we recreate the commits, and if one revision is a
|
|
# request that contains a target inside the projects in the
|
|
# "history", we create a merge commit.
|
|
#
|
|
# Optionally, if a flag is set, we will try to find a common
|
|
# "Initial commit" from a reference branch (the first one in
|
|
# "projects", that is safe to assume to be "openSUSE:Factory".
|
|
# This is not always a good idea. For example, in a normal
|
|
# situation the "devel" project history is older than
|
|
# "factory", and we can root the tree on it. But for some
|
|
# other projects we lost partially the "devel" history project
|
|
# (could be moved), and "factory" is not the root.
|
|
|
|
self.package = package
|
|
self.search_ancestor = search_ancestor
|
|
self.rebase_devel = rebase_devel
|
|
|
|
self.obs = OBS()
|
|
self.git = Git(
|
|
repodir,
|
|
committer="Git OBS Bridge",
|
|
committer_email="obsbridge@suse.de",
|
|
).create()
|
|
self.proxy_sha256 = ProxySHA256(self.obs, enabled=True)
|
|
|
|
self.history = History(self.obs, self.package)
|
|
|
|
# Add the "devel" project
|
|
(project, branch, api_url) = projects[0]
|
|
assert project == "openSUSE:Factory"
|
|
self.obs.change_url(api_url)
|
|
devel_project = self.obs.devel_project(project, package)
|
|
if devel_project:
|
|
self.projects = [(devel_project, "devel", api_url)] + projects
|
|
else:
|
|
self.projects = projects
|
|
|
|
# Associate the branch and api_url information per project
|
|
self.projects_info = {
|
|
project: (branch, api_url) for (project, branch, api_url) in self.projects
|
|
}
|
|
|
|
def download(self, revision):
|
|
obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
|
|
git_files = {
|
|
(f.name, f.stat().st_size, md5(f))
|
|
for f in self.git.path.iterdir()
|
|
if f.is_file() and f.name not in (".gitattributes")
|
|
}
|
|
|
|
# Overwrite ".gitattributes" with the
|
|
self.git.add_default_lfs_gitattributes(force=True)
|
|
|
|
# Download each file in OBS if it is not a binary (or large)
|
|
# file
|
|
for (name, size, file_md5) in obs_files:
|
|
# this file creates easily 100k commits and is just useless data :(
|
|
# unfortunately it's stored in the same meta package as the project config
|
|
if revision.package == "_project" and name == "_staging_workflow":
|
|
continue
|
|
# have such files been detected as text mimetype before?
|
|
is_text = self.proxy_sha256.is_text(name)
|
|
if not is_text and is_binary_or_large(name, size):
|
|
file_sha256 = self.proxy_sha256.get_or_put(
|
|
revision.project,
|
|
revision.package,
|
|
name,
|
|
revision.srcmd5,
|
|
file_md5,
|
|
size,
|
|
)
|
|
self.git.add_lfs(name, file_sha256["sha256"], size)
|
|
else:
|
|
if (name, size, file_md5) not in git_files:
|
|
print(f"Download {name}")
|
|
self.obs.download(
|
|
revision.project,
|
|
revision.package,
|
|
name,
|
|
revision.srcmd5,
|
|
self.git.path,
|
|
)
|
|
# Validate the MD5 of the downloaded file
|
|
if md5(self.git.path / name) != file_md5:
|
|
raise Exception(f"Download error in {name}")
|
|
self.git.add(name)
|
|
|
|
# Remove extra files
|
|
obs_names = {n for (n, _, _) in obs_files}
|
|
git_names = {n for (n, _, _) in git_files}
|
|
for name in git_names - obs_names:
|
|
print(f"Remove {name}")
|
|
self.git.remove(name)
|
|
|
|
def update_db_package(self, db, project, package):
|
|
root = self.obs._history(project, package)
|
|
if root is None:
|
|
return
|
|
latest = DBRevision.latest_revision(db, project, package)
|
|
for r in root.findall("revision"):
|
|
rev = OBSRevision(self.obs, self, project, package).parse(r)
|
|
if not latest or rev.rev > latest.rev:
|
|
dbrev = DBRevision.import_obs_rev(db, rev)
|
|
try:
|
|
root = rev.read_link()
|
|
except ET.ParseError:
|
|
dbrev.set_broken(db)
|
|
continue
|
|
if root is not None:
|
|
tprj = root.get("project") or project
|
|
tpkg = root.get("package") or package
|
|
dbrev.links_to(db, tprj, tpkg)
|
|
|
|
def find_linked_revs(self, db):
|
|
with db.cursor() as cur:
|
|
cur.execute(
|
|
"""SELECT * from revisions WHERE id in (SELECT l.revision_id FROM links l
|
|
LEFT JOIN linked_revs lrevs ON lrevs.revision_id=l.revision_id
|
|
WHERE lrevs.id IS NULL) and broken is FALSE;"""
|
|
)
|
|
for row in cur.fetchall():
|
|
rev = DBRevision(row)
|
|
linked_rev = rev.linked_rev(db)
|
|
if not linked_rev:
|
|
logging.debug(f"No link {rev}")
|
|
continue
|
|
cur.execute(
|
|
"""INSERT INTO linked_revs (revision_id, linked_id)
|
|
VALUES (%s,%s)""",
|
|
(rev.dbid, linked_rev.dbid),
|
|
)
|
|
|
|
def fetch_all_linked_packages(self, db, project, package):
|
|
with db.cursor() as cur:
|
|
cur.execute(
|
|
"""SELECT DISTINCT l.project, l.package from links l JOIN revisions r
|
|
on r.id=l.revision_id WHERE r.project=%s AND r.package=%s""",
|
|
(project, package),
|
|
)
|
|
for row in cur.fetchall():
|
|
(lproject, lpackage) = row
|
|
self.update_db_package(db, lproject, lpackage)
|
|
|
|
def find_fake_revisions(self, db):
|
|
with db.cursor() as cur:
|
|
cur.execute(
|
|
"SELECT * from revisions WHERE id in (SELECT linked_id from linked_revs WHERE considered=FALSE)"
|
|
)
|
|
for row in cur.fetchall():
|
|
self._find_fake_revision(db, DBRevision(row))
|
|
|
|
def _find_fake_revision(self, db, rev):
|
|
prev = rev.previous_commit(db)
|
|
if not prev:
|
|
with db.cursor() as cur:
|
|
cur.execute(
|
|
"UPDATE linked_revs SET considered=TRUE where linked_id=%s",
|
|
(rev.dbid,),
|
|
)
|
|
return
|
|
with db.cursor() as cur:
|
|
cur.execute(
|
|
"""SELECT * FROM revisions WHERE id IN
|
|
(SELECT revision_id from linked_revs WHERE linked_id=%s)
|
|
AND commit_time <= %s ORDER BY commit_time""",
|
|
(prev.dbid, rev.commit_time),
|
|
)
|
|
last_linked = None
|
|
for linked in cur.fetchall():
|
|
linked = DBRevision(linked)
|
|
nextrev = linked.next_commit(db)
|
|
if nextrev and nextrev.commit_time < rev.commit_time:
|
|
continue
|
|
last_linked = linked
|
|
cur.execute(
|
|
"UPDATE linked_revs SET considered=TRUE where linked_id=%s",
|
|
(rev.dbid,),
|
|
)
|
|
if not last_linked:
|
|
return
|
|
|
|
with db.cursor() as cur:
|
|
linked = last_linked
|
|
cur.execute(
|
|
"SELECT 1 FROM fake_revs where revision_id=%s AND linked_id=%s",
|
|
(rev.dbid, linked.dbid),
|
|
)
|
|
if cur.fetchone():
|
|
cur.execute(
|
|
"UPDATE linked_revs SET considered=TRUE where linked_id=%s",
|
|
(rev.dbid,),
|
|
)
|
|
return
|
|
fake_rev = linked.rev + rev.rev / 1000.0
|
|
comment = f"Updating link to change in {rev.project}/{rev.package} revision {rev.rev}"
|
|
cur.execute(
|
|
"""INSERT INTO revisions (project,package,rev,unexpanded_srcmd5,
|
|
commit_time, userid, comment) VALUES(%s,%s,%s,%s,%s,%s,%s) RETURNING id""",
|
|
(
|
|
linked.project,
|
|
linked.package,
|
|
fake_rev,
|
|
linked.unexpanded_srcmd5,
|
|
rev.commit_time,
|
|
"buildservice-autocommit",
|
|
comment,
|
|
),
|
|
)
|
|
new_id = cur.fetchone()[0]
|
|
cur.execute(
|
|
"""INSERT INTO linked_revs (revision_id, linked_id) VALUES (%s,%s)""",
|
|
(new_id, rev.dbid),
|
|
)
|
|
cur.execute(
|
|
"""INSERT INTO fake_revs (revision_id, linked_id) VALUES (%s,%s)""",
|
|
(rev.dbid, linked.dbid),
|
|
)
|
|
|
|
def revisions_without_files(self, db):
|
|
with db.cursor() as cur:
|
|
cur.execute(
|
|
"SELECT * FROM revisions WHERE broken=FALSE AND expanded_srcmd5 IS NULL"
|
|
)
|
|
return [DBRevision(row) for row in cur.fetchall()]
|
|
|
|
def export_as_git(self):
|
|
db = DB()
|
|
tree = TreeBuilder(db).build(self.package)
|
|
|
|
class FlatNode:
|
|
def __init__(self, branch, commit, parent1=None, parent2=None) -> None:
|
|
self.branch = branch
|
|
self.commit = commit
|
|
self.parent1 = parent1
|
|
self.parent2 = parent2
|
|
|
|
def __str__(self) -> str:
|
|
p1_str = ""
|
|
if self.parent1:
|
|
p1_str = f" p1:{self.parent1.short_string()}"
|
|
p2_str = ""
|
|
if self.parent2:
|
|
p2_str = f" p2:{self.parent2.short_string()}"
|
|
return f"{self.branch} c:{self.commit.short_string()}{p1_str}{p2_str}"
|
|
|
|
class FlatTreeWalker(AbstractWalker):
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
self.flats = []
|
|
# remember the last merge point so we can know the parent of it for the root of the sources
|
|
self.last_merge = None
|
|
|
|
def add(self, branch, commit, parent1=None, parent2=None):
|
|
self.flats.append(FlatNode(branch, commit, parent1, parent2))
|
|
|
|
def handle_source_node(self, node) -> None:
|
|
if node.parent and node.parent.merged_into and False:
|
|
self.add("devel", node.revision, node.parent.merged_into.revision)
|
|
return
|
|
if node.parent:
|
|
self.add("devel", node.revision, node.parent.revision)
|
|
elif self.last_merge:
|
|
self.add("devel", node.revision, self.last_merge.parent.revision)
|
|
|
|
def call(self, node, is_source) -> None:
|
|
if is_source:
|
|
self.handle_source_node(node)
|
|
return
|
|
if not node.parent:
|
|
self.add("factory", node.revision)
|
|
return
|
|
if not node.merged:
|
|
self.add("factory", node.revision, node.parent.revision)
|
|
return
|
|
self.add(
|
|
"factory", node.revision, node.parent.revision, node.merged.revision
|
|
)
|
|
|
|
self.last_merge = node
|
|
|
|
ftw = FlatTreeWalker()
|
|
tree.walk(ftw)
|
|
for flat in reversed(ftw.flats):
|
|
self.commit_flat(flat)
|
|
|
|
def commit_flat(self, flat):
|
|
parents = []
|
|
self.git.checkout(flat.branch)
|
|
if flat.parent1:
|
|
parents.append(flat.parent1.git_commit)
|
|
if flat.parent2:
|
|
parents.append(flat.parent2.git_commit)
|
|
|
|
commit = self.git.commit(
|
|
f"OBS User {flat.commit.userid}",
|
|
"null@suse.de",
|
|
flat.commit.commit_time,
|
|
# TODO: Normalize better the commit message
|
|
f"{flat.commit.comment}\n\n{flat.commit}",
|
|
allow_empty=True,
|
|
parents=parents,
|
|
)
|
|
flat.commit.git_commit = commit
|
|
|
|
def import_into_db(self):
|
|
db = DB()
|
|
for project, _, api_url in self.projects:
|
|
self.obs.change_url(api_url)
|
|
self.update_db_package(db, project, self.package)
|
|
self.fetch_all_linked_packages(db, project, self.package)
|
|
# all remaining, no filtering here
|
|
self.find_linked_revs(db)
|
|
|
|
missing_users = User.missing_users(db)
|
|
for userid in missing_users:
|
|
missing_user = self.obs.user(userid)
|
|
if missing_user:
|
|
missing_user.import_into_db(db)
|
|
|
|
self.find_fake_revisions(db)
|
|
for rev in self.revisions_without_files(db):
|
|
with db.cursor() as cur:
|
|
cur.execute(
|
|
"""SELECT unexpanded_srcmd5 from revisions WHERE
|
|
id=(SELECT linked_id FROM linked_revs WHERE revision_id=%s)""",
|
|
(rev.dbid,),
|
|
)
|
|
linked_rev = cur.fetchone()
|
|
if linked_rev:
|
|
linked_rev = linked_rev[0]
|
|
list = self.obs.list(
|
|
rev.project, rev.package, rev.unexpanded_srcmd5, linked_rev
|
|
)
|
|
if list:
|
|
rev.import_dir_list(db, list)
|
|
md5 = rev.calculate_files_hash(db)
|
|
with db.cursor() as cur:
|
|
cur.execute(
|
|
"UPDATE revisions SET files_hash=%s WHERE id=%s",
|
|
(md5, rev.dbid),
|
|
)
|
|
else:
|
|
rev.set_broken(db)
|
|
|
|
for number in DBRevision.requests_to_fetch(db):
|
|
self.obs.request(number).import_into_db(db)
|
|
|
|
db.conn.commit()
|
|
|
|
def import_all_revisions(self, gc):
|
|
# Fetch all the requests and sort them. Ideally we should
|
|
# build the graph here, to avoid new commits before the merge.
|
|
# For now we will sort them and invalidate the commits if
|
|
# "rebase_devel" is set.
|
|
self.history.fetch_all_revisions(self.projects)
|
|
revisions = self.history.sort_all_revisions()
|
|
|
|
logging.debug(f"Selected import order for {self.package}")
|
|
for revision in revisions:
|
|
logging.debug(revision)
|
|
|
|
gc_cnt = gc
|
|
for revision in revisions:
|
|
gc_cnt -= 1
|
|
if gc_cnt <= 0 and gc:
|
|
self.git.gc()
|
|
gc_cnt = gc
|
|
self.import_revision(revision)
|
|
|
|
def import_new_revision_with_request(self, revision, request):
|
|
"""Create a new branch as a result of a merge"""
|
|
|
|
submitted_revision = self.history.find_revision(
|
|
request.source, request.revisionid, revision.time
|
|
)
|
|
if not submitted_revision:
|
|
logging.warning(f"Request {request} does not connect to a known revision")
|
|
return False
|
|
|
|
if not submitted_revision.commit:
|
|
# If the revision appointed by the request is not part of
|
|
# the git history, we can have an ordering problem. One
|
|
# example is "premake4".
|
|
self.import_revision(submitted_revision)
|
|
|
|
assert submitted_revision.commit is not None
|
|
|
|
project = revision.project
|
|
branch, _ = self.projects_info[project]
|
|
|
|
# TODO: add an empty commit marking the acceptenace of the request (see discussion in PR 2858)
|
|
self.git.branch(branch, submitted_revision.commit)
|
|
self.git.clean()
|
|
self.git.checkout(branch)
|
|
|
|
logging.info(f"Create new branch based on {submitted_revision.commit}")
|
|
revision.commit = submitted_revision.commit
|
|
|
|
def _rebase_branch_history(self, project, revision):
|
|
branch, _ = self.projects_info[project]
|
|
history = self.history[project]
|
|
revision_index = history.index(revision)
|
|
for index in range(revision_index + 1, len(history)):
|
|
revision = history[index]
|
|
# We are done when we have one non-commited revision
|
|
if not revision.commit:
|
|
return
|
|
logging.info(f"Rebasing {revision} from {branch}")
|
|
revision.commit = None
|
|
self.import_revision(revision)
|
|
|
|
def import_revision_with_request(self, revision, request):
|
|
"""Import a single revision via a merge"""
|
|
|
|
submitted_revision = self.history.find_revision(
|
|
request.source, request.revisionid, revision.time
|
|
)
|
|
if not submitted_revision:
|
|
logging.warning(f"Request {request} does not connect to a known revision")
|
|
return False
|
|
assert submitted_revision.commit is not None
|
|
|
|
# TODO: detect a revision, case in point
|
|
# Base:System/bash/284 -> rq683701 -> accept O:F/151
|
|
# -> autocommit Base:System/bash/285
|
|
# Revert lead to openSUSE:Factory/bash/152
|
|
# Base:System/286 restored the reverted code in devel project
|
|
# rq684575 was created and accepted as O:F/153
|
|
# But the 284-285 and the 285-286 changeset is seen as empty
|
|
# as the revert was never in Base:System, so the
|
|
# submitted_revision of 684575 has no commit
|
|
if submitted_revision.commit == "EMPTY":
|
|
logging.warning("Empty commit submitted?!")
|
|
return False
|
|
|
|
message = (
|
|
f"Accepting request {revision.requestid}: {revision.comment}\n\n{revision}"
|
|
)
|
|
commit = self.git.merge(
|
|
# TODO: revision.userid or request.creator?
|
|
f"OBS User {revision.userid}",
|
|
"null@suse.de",
|
|
revision.time,
|
|
message,
|
|
submitted_revision.commit,
|
|
)
|
|
|
|
if commit == "EMPTY":
|
|
logging.warning("Empty merge. Ignoring the revision and the request")
|
|
self.git.merge_abort()
|
|
revision.commit = commit
|
|
return False
|
|
|
|
if commit == "CONFLICT":
|
|
logging.info("Merge conflict. Downloading revision")
|
|
self.download(revision)
|
|
message = f"CONFLICT {message}"
|
|
commit = self.git.merge(
|
|
f"OBS User {revision.userid}",
|
|
"null@suse.de",
|
|
revision.time,
|
|
message,
|
|
submitted_revision.commit,
|
|
merged=True,
|
|
)
|
|
|
|
assert commit and commit != "CONFLICT"
|
|
logging.info(f"Merge with {submitted_revision.commit} into {commit}")
|
|
revision.commit = commit
|
|
|
|
# TODO: There are more checks to do, like for example, the
|
|
# last commit into the non-devel branch should be a merge from
|
|
# the devel branch
|
|
if self.rebase_devel:
|
|
branch, _ = self.projects_info.get(request.source, (None, None))
|
|
if branch == "devel":
|
|
self.git.repo.references[f"refs/heads/{branch}"].set_target(commit)
|
|
self._rebase_branch_history(request.source, submitted_revision)
|
|
|
|
return True
|
|
|
|
def matching_request(self, revision):
|
|
request = self.obs.request(revision.requestid)
|
|
if not request:
|
|
return None
|
|
|
|
# to be handled by the caller
|
|
if request.type() != "submit":
|
|
return request
|
|
|
|
if request.source not in self.projects_info:
|
|
logging.info("Request from a non exported project")
|
|
return None
|
|
|
|
if request.target != revision.project:
|
|
# This seems to happen when the devel project gets
|
|
# reinitialized (for example, SR#943593 in 7zip, or
|
|
# SR#437901 in ColorFull)
|
|
logging.info("Request target different from current project")
|
|
return None
|
|
|
|
if request.source == request.target:
|
|
# this is not a merge, but a different way to do a
|
|
# contribution to the (devel) project - see bindfs's rev 1
|
|
logging.info("Request within the same project")
|
|
return None
|
|
|
|
return request
|
|
|
|
def import_revision(self, revision):
|
|
"""Import a single revision into git"""
|
|
project = revision.project
|
|
branch, api_url = self.projects_info[project]
|
|
|
|
logging.info(f"Importing [{revision}] to {branch}")
|
|
|
|
self.obs.change_url(api_url)
|
|
|
|
# Populate linkrev and replace srcmd5 from the linked
|
|
# revision. If the expansion fails, the revision will be ignored
|
|
# and not imported.
|
|
if not revision.check_expanded():
|
|
logging.warning(f"Broken revision")
|
|
revision.ignored = True
|
|
return
|
|
|
|
# When doing a SR, we see also a revision in the origin
|
|
# project with the outgoing request, but without changes in
|
|
# the project. We can ignore them.
|
|
#
|
|
# If there is a request ID, it will be filtered out later,
|
|
# when the target project is different from itself.
|
|
if revision.userid == "autobuild" and not revision.requestid:
|
|
logging.info("Ignoring autocommit")
|
|
revision.ignored = True
|
|
return
|
|
|
|
if revision.userid == "buildservice-autocommit":
|
|
logging.info("Ignoring autocommit")
|
|
revision.ignored = True
|
|
return
|
|
|
|
# Create the reference if the branch is new. If so return
|
|
# True.
|
|
new_branch = self.git.checkout(branch)
|
|
|
|
if revision.requestid:
|
|
request = self.matching_request(revision)
|
|
if request:
|
|
if request.type() == "delete":
|
|
# TODO: after this comes a restore, this should be collapsed
|
|
# before even hitting git
|
|
logging.info("Delete request ignored")
|
|
revision.ignored = True
|
|
return
|
|
|
|
logging.debug(f"Found matching request: #{revision.project} #{request}")
|
|
if new_branch:
|
|
self.import_new_revision_with_request(revision, request)
|
|
return
|
|
if self.import_revision_with_request(revision, request):
|
|
return
|
|
|
|
# Import revision as a single commit (without merging)
|
|
self.download(revision)
|
|
|
|
if new_branch or self.git.is_dirty():
|
|
commit = self.git.commit(
|
|
f"OBS User {revision.userid}",
|
|
"null@suse.de",
|
|
revision.time,
|
|
# TODO: Normalize better the commit message
|
|
f"{revision.comment}\n\n{revision}",
|
|
# Create an empty commit only if is a new branch
|
|
allow_empty=new_branch,
|
|
)
|
|
revision.commit = commit
|
|
logging.info(f"Commit {commit}")
|
|
else:
|
|
logging.info("Skip empty commit")
|
|
revision.ignored = True
|