git-importer/lib/git_exporter.py
Stephan Kulow ed4b7367eb Reset branch if the devel branch is based on Factory
This happens in packages that change their devel project over time. Then
the commit in the devel project no longer has the parent in the devel branch
but is based on factory
2022-11-03 15:12:07 +01:00

143 lines
5.0 KiB
Python

import logging
import os
import yaml
from lib.binary import is_binary_or_large
from lib.db import DB
from lib.git import Git
from lib.obs import OBS
from lib.proxy_sha256 import ProxySHA256, md5
from lib.tree_builder import TreeBuilder
class GitExporter:
def __init__(self, api_url, project, package, repodir, cachedir):
self.obs = OBS()
self.project = project
self.package = package
# TODO: Store the api url in the revision
self.obs.change_url(api_url)
self.proxy_sha256 = ProxySHA256(self.obs, enabled=True)
self.git = Git(
repodir,
committer="Git OBS Bridge",
committer_email="obsbridge@suse.de",
).create()
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
self.gc_interval = 200
self.cachedir = cachedir
def set_gc_interval(self, gc):
self.gc_interval = gc
def export_as_git(self):
db = DB()
tree = TreeBuilder(db).build(self.project, self.package)
flats = tree.as_flat_list()
branch_state = {"factory": None, "devel": None}
state_data = dict()
if os.path.exists(self.state_file):
with open(self.state_file, "r") as f:
state_data = yaml.safe_load(f)
if type(state_data) != dict:
state_data = {}
left_to_commit = []
for flat in reversed(flats):
found_state = False
for branch in ["factory", "devel"]:
if flat.commit.dbid == state_data.get(branch):
branch_state[branch] = flat.commit
flat.commit.git_commit = self.git.branch_head(branch)
logging.debug(
f"Found {self.git.path}'s {branch} branch in state {flat}"
)
left_to_commit = []
found_state = True
if not found_state:
left_to_commit.append(flat)
gc_cnt = self.gc_interval
if len(left_to_commit) > 0:
self.git.gc()
for flat in left_to_commit:
gc_cnt -= 1
if gc_cnt <= 0 and self.gc_interval:
self.git.gc()
gc_cnt = self.gc_interval
logging.debug(f"Committing {flat}")
self.commit_flat(flat, branch_state)
def commit_file(self, flat, file, size, md5):
# have such files been detected as text mimetype before?
is_text = self.proxy_sha256.is_text(file.name)
if not is_text and is_binary_or_large(file.name, size):
file_sha256 = self.proxy_sha256.get_or_put(
flat.commit.project,
flat.commit.package,
file.name,
flat.commit.expanded_srcmd5,
md5,
size,
)
self.git.add_lfs(file.name, file_sha256["sha256"], size)
else:
self.obs.download(
flat.commit.project,
flat.commit.package,
file.name,
flat.commit.expanded_srcmd5,
self.git.path,
self.cachedir,
file_md5=md5,
)
self.git.add(file)
def branch_fits_parent1(self, flat, branch_state):
if branch_state[flat.branch] is None:
# everything fits nothing
return True
return flat.parent1 == branch_state[flat.branch]
def commit_flat(self, flat, branch_state):
parents = []
self.git.checkout(flat.branch)
if flat.parent1:
if not self.branch_fits_parent1(flat, branch_state):
logging.info(f"Reset {flat.branch} onto {flat.parent1.short_string()}")
self.git.set_branch_head(flat.branch, flat.parent1.git_commit)
self.git.checkout(flat.branch)
parents.append(flat.parent1.git_commit)
if flat.parent2:
parents.append(flat.parent2.git_commit)
# Overwrite ".gitattributes" with the
self.git.add_default_lfs_gitattributes(force=True)
to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
for file in to_delete:
self.git.remove(file)
for file, size, md5 in to_download:
self.commit_file(flat, file, size, md5)
commit = self.git.commit(
f"OBS User {flat.commit.userid}",
"null@suse.de",
flat.commit.commit_time,
# TODO: Normalize better the commit message
f"{flat.commit.comment}\n\n{flat.commit}",
allow_empty=True,
parents=parents,
)
flat.commit.git_commit = commit
branch_state[flat.branch] = flat.commit
with open(self.state_file, "w") as f:
data = {}
for branch in ["factory", "devel"]:
commit = branch_state[branch]
if commit:
data[branch] = commit.dbid
yaml.dump(data, f)