forked from adamm/git-importer
421 lines
16 KiB
Python
421 lines
16 KiB
Python
import functools
|
|
import logging
|
|
|
|
from lib.binary import is_binary_or_large
|
|
from lib.db import DB
|
|
from lib.db_revision import DBRevision
|
|
from lib.git import Git
|
|
from lib.history import History
|
|
from lib.obs import OBS
|
|
from lib.obs_revision import OBSRevision
|
|
from lib.proxy_sha256 import ProxySHA256, md5, sha256
|
|
from lib.request import Request
|
|
import xml.etree.ElementTree as ET
|
|
from lib.user import User
|
|
|
|
|
|
def _files_hash(hash_alg, dirpath):
|
|
"""List of (filepath, md5) for a directory"""
|
|
# TODO: do it async or multythread
|
|
files = [f for f in dirpath.iterdir() if f.is_file()]
|
|
return [(f.parts[-1], hash_alg(f)) for f in files]
|
|
|
|
|
|
files_md5 = functools.partial(_files_hash, md5)
|
|
files_sha256 = functools.partial(_files_hash, sha256)
|
|
|
|
|
|
class Importer:
|
|
def __init__(self, projects, package, repodir, search_ancestor, rebase_devel):
|
|
# The idea is to create each commit in order, and draw the
|
|
# same graph described by the revisions timeline. For that we
|
|
# need first to fetch all the revisions and sort them
|
|
# linearly, based on the timestamp.
|
|
#
|
|
# After that we recreate the commits, and if one revision is a
|
|
# request that contains a target inside the projects in the
|
|
# "history", we create a merge commit.
|
|
#
|
|
# Optionally, if a flag is set, we will try to find a common
|
|
# "Initial commit" from a reference branch (the first one in
|
|
# "projects", that is safe to assume to be "openSUSE:Factory".
|
|
# This is not always a good idea. For example, in a normal
|
|
# situation the "devel" project history is older than
|
|
# "factory", and we can root the tree on it. But for some
|
|
# other projects we lost partially the "devel" history project
|
|
# (could be moved), and "factory" is not the root.
|
|
|
|
self.package = package
|
|
self.search_ancestor = search_ancestor
|
|
self.rebase_devel = rebase_devel
|
|
|
|
self.obs = OBS()
|
|
self.git = Git(
|
|
repodir,
|
|
committer="Git OBS Bridge",
|
|
committer_email="obsbridge@suse.de",
|
|
).create()
|
|
self.proxy_sha256 = ProxySHA256(self.obs, enabled=True)
|
|
|
|
self.history = History(self.obs, self.package)
|
|
|
|
# Add the "devel" project
|
|
(project, branch, api_url) = projects[0]
|
|
assert project == "openSUSE:Factory"
|
|
self.obs.change_url(api_url)
|
|
devel_project = self.obs.devel_project(project, package)
|
|
if devel_project:
|
|
self.projects = [(devel_project, "devel", api_url)] + projects
|
|
else:
|
|
self.projects = projects
|
|
|
|
# Associate the branch and api_url information per project
|
|
self.projects_info = {
|
|
project: (branch, api_url) for (project, branch, api_url) in self.projects
|
|
}
|
|
|
|
def download(self, revision):
|
|
obs_files = self.obs.files(revision.project, revision.package, revision.srcmd5)
|
|
git_files = {
|
|
(f.name, f.stat().st_size, md5(f))
|
|
for f in self.git.path.iterdir()
|
|
if f.is_file() and f.name not in (".gitattributes")
|
|
}
|
|
|
|
# Overwrite ".gitattributes" with the
|
|
self.git.add_default_lfs_gitattributes(force=True)
|
|
|
|
# Download each file in OBS if it is not a binary (or large)
|
|
# file
|
|
for (name, size, file_md5) in obs_files:
|
|
# this file creates easily 100k commits and is just useless data :(
|
|
# unfortunately it's stored in the same meta package as the project config
|
|
if revision.package == "_project" and name == "_staging_workflow":
|
|
continue
|
|
# have such files been detected as text mimetype before?
|
|
is_text = self.proxy_sha256.is_text(name)
|
|
if not is_text and is_binary_or_large(name, size):
|
|
file_sha256 = self.proxy_sha256.get_or_put(
|
|
revision.project,
|
|
revision.package,
|
|
name,
|
|
revision.srcmd5,
|
|
file_md5,
|
|
size,
|
|
)
|
|
self.git.add_lfs(name, file_sha256["sha256"], size)
|
|
else:
|
|
if (name, size, file_md5) not in git_files:
|
|
print(f"Download {name}")
|
|
self.obs.download(
|
|
revision.project,
|
|
revision.package,
|
|
name,
|
|
revision.srcmd5,
|
|
self.git.path,
|
|
)
|
|
# Validate the MD5 of the downloaded file
|
|
if md5(self.git.path / name) != file_md5:
|
|
raise Exception(f"Download error in {name}")
|
|
self.git.add(name)
|
|
|
|
# Remove extra files
|
|
obs_names = {n for (n, _, _) in obs_files}
|
|
git_names = {n for (n, _, _) in git_files}
|
|
for name in git_names - obs_names:
|
|
print(f"Remove {name}")
|
|
self.git.remove(name)
|
|
|
|
def update_db_package(self, db, project, package):
|
|
root = self.obs._history(project, package)
|
|
if root is None:
|
|
return
|
|
latest = DBRevision.latest_revision(db, project, package)
|
|
for r in root.findall("revision"):
|
|
rev = OBSRevision(self.obs, self, project, package).parse(r)
|
|
if not latest or rev.rev > latest.rev:
|
|
dbrev = DBRevision.import_obs_rev(db, rev)
|
|
try:
|
|
root = rev.read_link()
|
|
except ET.ParseError:
|
|
dbrev.set_broken(db)
|
|
continue
|
|
if root is not None:
|
|
tprj = root.get("project") or project
|
|
tpkg = root.get("package") or package
|
|
dbrev.links_to(db, tprj, tpkg)
|
|
db.conn.commit()
|
|
|
|
def import_into_db(self):
|
|
db = DB()
|
|
for project, _, api_url in self.projects:
|
|
self.obs.change_url(api_url)
|
|
self.update_db_package(db, project, self.package)
|
|
with db.cursor() as cur:
|
|
cur.execute(
|
|
"SELECT DISTINCT l.project, l.package from links l join revisions r on r.id=l.revision_id WHERE r.project=%s AND r.package=%s",
|
|
(project, self.package),
|
|
)
|
|
for row in cur.fetchall():
|
|
(lproject, lpackage) = row
|
|
self.update_db_package(db, lproject, lpackage)
|
|
|
|
missing_users = User.missing_users(db)
|
|
for userid in missing_users:
|
|
missing_user = self.obs.user(userid)
|
|
if missing_user:
|
|
missing_user.import_into_db(db)
|
|
|
|
for rev in DBRevision.all_revisions(db, project, self.package):
|
|
# TODO move into SELECT
|
|
if rev.broken or rev.expanded_srcmd5:
|
|
continue
|
|
linked_rev = rev.linked_rev(db)
|
|
if linked_rev:
|
|
linked_rev = linked_rev.unexpanded_srcmd5
|
|
list = self.obs.list(
|
|
project, self.package, rev.unexpanded_srcmd5, linked_rev
|
|
)
|
|
if list:
|
|
rev.import_dir_list(db, list)
|
|
else:
|
|
rev.set_broken(db)
|
|
|
|
for number in DBRevision.requests_to_fetch(db, project, self.package):
|
|
self.obs.request(number).import_into_db(db)
|
|
|
|
db.conn.commit()
|
|
|
|
def import_all_revisions(self, gc):
|
|
# Fetch all the requests and sort them. Ideally we should
|
|
# build the graph here, to avoid new commits before the merge.
|
|
# For now we will sort them and invalidate the commits if
|
|
# "rebase_devel" is set.
|
|
self.history.fetch_all_revisions(self.projects)
|
|
revisions = self.history.sort_all_revisions()
|
|
|
|
logging.debug(f"Selected import order for {self.package}")
|
|
for revision in revisions:
|
|
logging.debug(revision)
|
|
|
|
gc_cnt = gc
|
|
for revision in revisions:
|
|
gc_cnt -= 1
|
|
if gc_cnt <= 0 and gc:
|
|
self.git.gc()
|
|
gc_cnt = gc
|
|
self.import_revision(revision)
|
|
|
|
def import_new_revision_with_request(self, revision, request):
|
|
"""Create a new branch as a result of a merge"""
|
|
|
|
submitted_revision = self.history.find_revision(
|
|
request.source, request.revisionid, revision.time
|
|
)
|
|
if not submitted_revision:
|
|
logging.warning(f"Request {request} does not connect to a known revision")
|
|
return False
|
|
|
|
if not submitted_revision.commit:
|
|
# If the revision appointed by the request is not part of
|
|
# the git history, we can have an ordering problem. One
|
|
# example is "premake4".
|
|
self.import_revision(submitted_revision)
|
|
|
|
assert submitted_revision.commit is not None
|
|
|
|
project = revision.project
|
|
branch, _ = self.projects_info[project]
|
|
|
|
# TODO: add an empty commit marking the acceptenace of the request (see discussion in PR 2858)
|
|
self.git.branch(branch, submitted_revision.commit)
|
|
self.git.clean()
|
|
self.git.checkout(branch)
|
|
|
|
logging.info(f"Create new branch based on {submitted_revision.commit}")
|
|
revision.commit = submitted_revision.commit
|
|
|
|
def _rebase_branch_history(self, project, revision):
|
|
branch, _ = self.projects_info[project]
|
|
history = self.history[project]
|
|
revision_index = history.index(revision)
|
|
for index in range(revision_index + 1, len(history)):
|
|
revision = history[index]
|
|
# We are done when we have one non-commited revision
|
|
if not revision.commit:
|
|
return
|
|
logging.info(f"Rebasing {revision} from {branch}")
|
|
revision.commit = None
|
|
self.import_revision(revision)
|
|
|
|
def import_revision_with_request(self, revision, request):
|
|
"""Import a single revision via a merge"""
|
|
|
|
submitted_revision = self.history.find_revision(
|
|
request.source, request.revisionid, revision.time
|
|
)
|
|
if not submitted_revision:
|
|
logging.warning(f"Request {request} does not connect to a known revision")
|
|
return False
|
|
assert submitted_revision.commit is not None
|
|
|
|
# TODO: detect a revision, case in point
|
|
# Base:System/bash/284 -> rq683701 -> accept O:F/151
|
|
# -> autocommit Base:System/bash/285
|
|
# Revert lead to openSUSE:Factory/bash/152
|
|
# Base:System/286 restored the reverted code in devel project
|
|
# rq684575 was created and accepted as O:F/153
|
|
# But the 284-285 and the 285-286 changeset is seen as empty
|
|
# as the revert was never in Base:System, so the
|
|
# submitted_revision of 684575 has no commit
|
|
if submitted_revision.commit == "EMPTY":
|
|
logging.warning("Empty commit submitted?!")
|
|
return False
|
|
|
|
message = (
|
|
f"Accepting request {revision.requestid}: {revision.comment}\n\n{revision}"
|
|
)
|
|
commit = self.git.merge(
|
|
# TODO: revision.userid or request.creator?
|
|
f"OBS User {revision.userid}",
|
|
"null@suse.de",
|
|
revision.time,
|
|
message,
|
|
submitted_revision.commit,
|
|
)
|
|
|
|
if commit == "EMPTY":
|
|
logging.warning("Empty merge. Ignoring the revision and the request")
|
|
self.git.merge_abort()
|
|
revision.commit = commit
|
|
return False
|
|
|
|
if commit == "CONFLICT":
|
|
logging.info("Merge conflict. Downloading revision")
|
|
self.download(revision)
|
|
message = f"CONFLICT {message}"
|
|
commit = self.git.merge(
|
|
f"OBS User {revision.userid}",
|
|
"null@suse.de",
|
|
revision.time,
|
|
message,
|
|
submitted_revision.commit,
|
|
merged=True,
|
|
)
|
|
|
|
assert commit and commit != "CONFLICT"
|
|
logging.info(f"Merge with {submitted_revision.commit} into {commit}")
|
|
revision.commit = commit
|
|
|
|
# TODO: There are more checks to do, like for example, the
|
|
# last commit into the non-devel branch should be a merge from
|
|
# the devel branch
|
|
if self.rebase_devel:
|
|
branch, _ = self.projects_info.get(request.source, (None, None))
|
|
if branch == "devel":
|
|
self.git.repo.references[f"refs/heads/{branch}"].set_target(commit)
|
|
self._rebase_branch_history(request.source, submitted_revision)
|
|
|
|
return True
|
|
|
|
def matching_request(self, revision):
|
|
request = self.obs.request(revision.requestid)
|
|
if not request:
|
|
return None
|
|
|
|
# to be handled by the caller
|
|
if request.type() != "submit":
|
|
return request
|
|
|
|
if request.source not in self.projects_info:
|
|
logging.info("Request from a non exported project")
|
|
return None
|
|
|
|
if request.target != revision.project:
|
|
# This seems to happen when the devel project gets
|
|
# reinitialized (for example, SR#943593 in 7zip, or
|
|
# SR#437901 in ColorFull)
|
|
logging.info("Request target different from current project")
|
|
return None
|
|
|
|
if request.source == request.target:
|
|
# this is not a merge, but a different way to do a
|
|
# contribution to the (devel) project - see bindfs's rev 1
|
|
logging.info("Request within the same project")
|
|
return None
|
|
|
|
return request
|
|
|
|
def import_revision(self, revision):
|
|
"""Import a single revision into git"""
|
|
project = revision.project
|
|
branch, api_url = self.projects_info[project]
|
|
|
|
logging.info(f"Importing [{revision}] to {branch}")
|
|
|
|
self.obs.change_url(api_url)
|
|
|
|
# Populate linkrev and replace srcmd5 from the linked
|
|
# revision. If the expansion fails, the revision will be ignored
|
|
# and not imported.
|
|
if not revision.check_expanded():
|
|
logging.warning(f"Broken revision")
|
|
revision.ignored = True
|
|
return
|
|
|
|
# When doing a SR, we see also a revision in the origin
|
|
# project with the outgoing request, but without changes in
|
|
# the project. We can ignore them.
|
|
#
|
|
# If there is a request ID, it will be filtered out later,
|
|
# when the target project is different from itself.
|
|
if revision.userid == "autobuild" and not revision.requestid:
|
|
logging.info("Ignoring autocommit")
|
|
revision.ignored = True
|
|
return
|
|
|
|
if revision.userid == "buildservice-autocommit":
|
|
logging.info("Ignoring autocommit")
|
|
revision.ignored = True
|
|
return
|
|
|
|
# Create the reference if the branch is new. If so return
|
|
# True.
|
|
new_branch = self.git.checkout(branch)
|
|
|
|
if revision.requestid:
|
|
request = self.matching_request(revision)
|
|
if request:
|
|
if request.type() == "delete":
|
|
# TODO: after this comes a restore, this should be collapsed
|
|
# before even hitting git
|
|
logging.info("Delete request ignored")
|
|
revision.ignored = True
|
|
return
|
|
|
|
logging.debug(f"Found matching request: #{revision.project} #{request}")
|
|
if new_branch:
|
|
self.import_new_revision_with_request(revision, request)
|
|
return
|
|
if self.import_revision_with_request(revision, request):
|
|
return
|
|
|
|
# Import revision as a single commit (without merging)
|
|
self.download(revision)
|
|
|
|
if new_branch or self.git.is_dirty():
|
|
commit = self.git.commit(
|
|
f"OBS User {revision.userid}",
|
|
"null@suse.de",
|
|
revision.time,
|
|
# TODO: Normalize better the commit message
|
|
f"{revision.comment}\n\n{revision}",
|
|
# Create an empty commit only if is a new branch
|
|
allow_empty=new_branch,
|
|
)
|
|
revision.commit = commit
|
|
logging.info(f"Commit {commit}")
|
|
else:
|
|
logging.info("Skip empty commit")
|
|
revision.ignored = True
|