forked from adamm/git-importer
ed4b7367eb
This happens in packages that change their devel project over time. Then the commit in the devel project no longer has the parent in the devel branch but is based on factory
143 lines
5.0 KiB
Python
143 lines
5.0 KiB
Python
import logging
|
|
import os
|
|
|
|
import yaml
|
|
|
|
from lib.binary import is_binary_or_large
|
|
from lib.db import DB
|
|
from lib.git import Git
|
|
from lib.obs import OBS
|
|
from lib.proxy_sha256 import ProxySHA256, md5
|
|
from lib.tree_builder import TreeBuilder
|
|
|
|
|
|
class GitExporter:
|
|
def __init__(self, api_url, project, package, repodir, cachedir):
|
|
self.obs = OBS()
|
|
self.project = project
|
|
self.package = package
|
|
# TODO: Store the api url in the revision
|
|
self.obs.change_url(api_url)
|
|
self.proxy_sha256 = ProxySHA256(self.obs, enabled=True)
|
|
self.git = Git(
|
|
repodir,
|
|
committer="Git OBS Bridge",
|
|
committer_email="obsbridge@suse.de",
|
|
).create()
|
|
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
|
|
self.gc_interval = 200
|
|
self.cachedir = cachedir
|
|
|
|
def set_gc_interval(self, gc):
|
|
self.gc_interval = gc
|
|
|
|
def export_as_git(self):
|
|
db = DB()
|
|
tree = TreeBuilder(db).build(self.project, self.package)
|
|
flats = tree.as_flat_list()
|
|
|
|
branch_state = {"factory": None, "devel": None}
|
|
state_data = dict()
|
|
if os.path.exists(self.state_file):
|
|
with open(self.state_file, "r") as f:
|
|
state_data = yaml.safe_load(f)
|
|
if type(state_data) != dict:
|
|
state_data = {}
|
|
left_to_commit = []
|
|
for flat in reversed(flats):
|
|
found_state = False
|
|
for branch in ["factory", "devel"]:
|
|
if flat.commit.dbid == state_data.get(branch):
|
|
branch_state[branch] = flat.commit
|
|
flat.commit.git_commit = self.git.branch_head(branch)
|
|
logging.debug(
|
|
f"Found {self.git.path}'s {branch} branch in state {flat}"
|
|
)
|
|
left_to_commit = []
|
|
found_state = True
|
|
if not found_state:
|
|
left_to_commit.append(flat)
|
|
|
|
gc_cnt = self.gc_interval
|
|
if len(left_to_commit) > 0:
|
|
self.git.gc()
|
|
for flat in left_to_commit:
|
|
gc_cnt -= 1
|
|
if gc_cnt <= 0 and self.gc_interval:
|
|
self.git.gc()
|
|
gc_cnt = self.gc_interval
|
|
logging.debug(f"Committing {flat}")
|
|
self.commit_flat(flat, branch_state)
|
|
|
|
def commit_file(self, flat, file, size, md5):
|
|
# have such files been detected as text mimetype before?
|
|
is_text = self.proxy_sha256.is_text(file.name)
|
|
if not is_text and is_binary_or_large(file.name, size):
|
|
file_sha256 = self.proxy_sha256.get_or_put(
|
|
flat.commit.project,
|
|
flat.commit.package,
|
|
file.name,
|
|
flat.commit.expanded_srcmd5,
|
|
md5,
|
|
size,
|
|
)
|
|
self.git.add_lfs(file.name, file_sha256["sha256"], size)
|
|
else:
|
|
self.obs.download(
|
|
flat.commit.project,
|
|
flat.commit.package,
|
|
file.name,
|
|
flat.commit.expanded_srcmd5,
|
|
self.git.path,
|
|
self.cachedir,
|
|
file_md5=md5,
|
|
)
|
|
self.git.add(file)
|
|
|
|
def branch_fits_parent1(self, flat, branch_state):
|
|
if branch_state[flat.branch] is None:
|
|
# everything fits nothing
|
|
return True
|
|
return flat.parent1 == branch_state[flat.branch]
|
|
|
|
def commit_flat(self, flat, branch_state):
|
|
parents = []
|
|
self.git.checkout(flat.branch)
|
|
|
|
if flat.parent1:
|
|
if not self.branch_fits_parent1(flat, branch_state):
|
|
logging.info(f"Reset {flat.branch} onto {flat.parent1.short_string()}")
|
|
self.git.set_branch_head(flat.branch, flat.parent1.git_commit)
|
|
self.git.checkout(flat.branch)
|
|
parents.append(flat.parent1.git_commit)
|
|
if flat.parent2:
|
|
parents.append(flat.parent2.git_commit)
|
|
|
|
# Overwrite ".gitattributes" with the
|
|
self.git.add_default_lfs_gitattributes(force=True)
|
|
|
|
to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
|
|
for file in to_delete:
|
|
self.git.remove(file)
|
|
for file, size, md5 in to_download:
|
|
self.commit_file(flat, file, size, md5)
|
|
|
|
commit = self.git.commit(
|
|
f"OBS User {flat.commit.userid}",
|
|
"null@suse.de",
|
|
flat.commit.commit_time,
|
|
# TODO: Normalize better the commit message
|
|
f"{flat.commit.comment}\n\n{flat.commit}",
|
|
allow_empty=True,
|
|
parents=parents,
|
|
)
|
|
flat.commit.git_commit = commit
|
|
branch_state[flat.branch] = flat.commit
|
|
with open(self.state_file, "w") as f:
|
|
data = {}
|
|
for branch in ["factory", "devel"]:
|
|
commit = branch_state[branch]
|
|
if commit:
|
|
data[branch] = commit.dbid
|
|
yaml.dump(data, f)
|