forked from adamm/git-importer
7b20c03256
As devel branches can change in case of factory reverts we need to force push. Factory branch shouldn't be affected, so not force pushing there
176 lines
6.0 KiB
Python
176 lines
6.0 KiB
Python
import logging
|
|
import os
|
|
|
|
import yaml
|
|
|
|
from lib.binary import is_binary_or_large
|
|
from lib.db import DB
|
|
from lib.git import Git
|
|
from lib.lfs_oid import LFSOid
|
|
from lib.obs import OBS
|
|
from lib.proxy_sha256 import ProxySHA256
|
|
from lib.tree_builder import TreeBuilder
|
|
from lib.user import User
|
|
|
|
|
|
class GitExporter:
|
|
def __init__(self, api_url, project, package, repodir, cachedir):
|
|
self.obs = OBS(api_url)
|
|
self.project = project
|
|
self.package = package
|
|
self.db = DB()
|
|
self.proxy_sha256 = ProxySHA256(self.obs, self.db)
|
|
self.git = Git(
|
|
repodir / package,
|
|
committer="Git OBS Bridge",
|
|
committer_email="obsbridge@suse.de",
|
|
)
|
|
if self.git.exists():
|
|
self.git.open()
|
|
else:
|
|
self.git.create()
|
|
self.git.add_gitea_remote(package)
|
|
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
|
|
self.gc_interval = 200
|
|
self.cachedir = cachedir
|
|
|
|
def set_gc_interval(self, gc):
|
|
self.gc_interval = gc
|
|
|
|
def check_repo_state(self, flats, branch_state):
|
|
state_data = dict()
|
|
if os.path.exists(self.state_file):
|
|
with open(self.state_file, "r") as f:
|
|
state_data = yaml.safe_load(f)
|
|
if type(state_data) != dict:
|
|
state_data = {}
|
|
left_to_commit = []
|
|
for flat in reversed(flats):
|
|
found_state = False
|
|
for branch in ["factory", "devel"]:
|
|
if flat.commit.dbid == state_data.get(branch):
|
|
branch_state[branch] = flat.commit
|
|
flat.commit.git_commit = self.git.branch_head(branch)
|
|
logging.debug(
|
|
f"Found {self.git.path}'s {branch} branch in state {flat}"
|
|
)
|
|
left_to_commit = []
|
|
found_state = True
|
|
if not found_state:
|
|
left_to_commit.append(flat)
|
|
return left_to_commit
|
|
|
|
def export_as_git(self):
|
|
if os.getenv("CHECK_ALL_LFS"):
|
|
LFSOid.check_all(self.db, self.package)
|
|
tree = TreeBuilder(self.db).build(self.project, self.package)
|
|
flats = tree.as_flat_list()
|
|
|
|
branch_state = {"factory": None, "devel": None}
|
|
left_to_commit = self.check_repo_state(flats, branch_state)
|
|
|
|
if not left_to_commit:
|
|
return
|
|
|
|
logging.info(f"Commiting into {self.git.path}")
|
|
self.run_gc()
|
|
users = dict()
|
|
|
|
for flat in left_to_commit:
|
|
if flat.commit.userid not in users:
|
|
users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
|
|
flat.user = users[flat.commit.userid]
|
|
self.gc_cnt -= 1
|
|
if self.gc_cnt <= 0 and self.gc_interval:
|
|
self.run_gc()
|
|
logging.debug(f"Committing {flat}")
|
|
self.commit_flat(flat, branch_state)
|
|
|
|
self.git.push(force=True)
|
|
|
|
def run_gc(self):
|
|
self.gc_cnt = self.gc_interval
|
|
self.git.gc()
|
|
|
|
def is_lfs_file(self, package, filename, size):
|
|
if not is_binary_or_large(filename, size):
|
|
return False
|
|
return not self.proxy_sha256.is_text(package, filename)
|
|
|
|
def commit_file(self, flat, file, size, md5):
|
|
# have such files been detected as text mimetype before?
|
|
if self.is_lfs_file(flat.commit.package, file.name, size):
|
|
file_sha256 = self.proxy_sha256.get_or_put(
|
|
flat.commit.project,
|
|
flat.commit.package,
|
|
file.name,
|
|
flat.commit.expanded_srcmd5,
|
|
md5,
|
|
size,
|
|
)
|
|
# as it's newly registered, it might be a text file now, so double check
|
|
if not self.proxy_sha256.is_text(flat.commit.package, file.name):
|
|
self.git.add_lfs(file.name, file_sha256, size)
|
|
return
|
|
self.commit_non_lfs_file(flat, file, md5)
|
|
|
|
def commit_non_lfs_file(self, flat, file, md5):
|
|
self.obs.change_url(flat.commit.api_url)
|
|
self.obs.download(
|
|
flat.commit.project,
|
|
flat.commit.package,
|
|
file.name,
|
|
flat.commit.expanded_srcmd5,
|
|
self.git.path,
|
|
self.cachedir,
|
|
file_md5=md5,
|
|
)
|
|
self.git.add(file)
|
|
|
|
def branch_fits_parent1(self, flat, branch_state):
|
|
if branch_state[flat.branch] is None:
|
|
# everything fits nothing
|
|
return True
|
|
return flat.parent1 == branch_state[flat.branch]
|
|
|
|
def commit_flat(self, flat, branch_state):
|
|
parents = []
|
|
self.git.checkout(flat.branch)
|
|
|
|
if flat.parent1:
|
|
if not self.branch_fits_parent1(flat, branch_state):
|
|
logging.debug(f"Reset {flat.branch} onto {flat.parent1.short_string()}")
|
|
assert flat.parent1.git_commit
|
|
self.git.set_branch_head(flat.branch, flat.parent1.git_commit)
|
|
self.git.checkout(flat.branch)
|
|
parents.append(flat.parent1.git_commit)
|
|
if flat.parent2:
|
|
assert flat.parent2.git_commit
|
|
parents.append(flat.parent2.git_commit)
|
|
|
|
# create file if not existant
|
|
self.git.add_default_lfs_gitattributes(force=False)
|
|
|
|
to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
|
|
for file in to_delete:
|
|
self.git.remove(file)
|
|
for file, size, md5 in to_download:
|
|
self.commit_file(flat, file, size, md5)
|
|
|
|
commit = self.git.commit(
|
|
flat.user.realname,
|
|
flat.user.email,
|
|
flat.commit.commit_time,
|
|
flat.commit.git_commit_message(),
|
|
parents=parents,
|
|
)
|
|
flat.commit.git_commit = commit
|
|
branch_state[flat.branch] = flat.commit
|
|
with open(self.state_file, "w") as f:
|
|
data = {}
|
|
for branch in ["factory", "devel"]:
|
|
commit = branch_state[branch]
|
|
if commit:
|
|
data[branch] = commit.dbid
|
|
yaml.dump(data, f)
|