forked from importers/git-importer
7861a7e9b0
Refactored the LFS Oid handling in its class of its own and add a way to recheck all LFS handles (or re-register)
176 lines
6.0 KiB
Python
176 lines
6.0 KiB
Python
import logging
|
|
import os
|
|
|
|
import yaml
|
|
|
|
from lib.binary import is_binary_or_large
|
|
from lib.db import DB
|
|
from lib.git import Git
|
|
from lib.lfs_oid import LFSOid
|
|
from lib.obs import OBS
|
|
from lib.proxy_sha256 import ProxySHA256
|
|
from lib.tree_builder import TreeBuilder
|
|
from lib.user import User
|
|
|
|
|
|
class GitExporter:
|
|
def __init__(self, api_url, project, package, repodir, cachedir):
|
|
self.obs = OBS(api_url)
|
|
self.project = project
|
|
self.package = package
|
|
self.db = DB()
|
|
self.proxy_sha256 = ProxySHA256(self.obs, self.db)
|
|
self.git = Git(
|
|
repodir / package,
|
|
committer="Git OBS Bridge",
|
|
committer_email="obsbridge@suse.de",
|
|
)
|
|
if self.git.exists():
|
|
self.git.open()
|
|
else:
|
|
self.git.create()
|
|
self.git.add_gitea_remote(package)
|
|
self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml")
|
|
self.gc_interval = 200
|
|
self.cachedir = cachedir
|
|
|
|
def set_gc_interval(self, gc):
|
|
self.gc_interval = gc
|
|
|
|
def check_repo_state(self, flats, branch_state):
|
|
state_data = dict()
|
|
if os.path.exists(self.state_file):
|
|
with open(self.state_file, "r") as f:
|
|
state_data = yaml.safe_load(f)
|
|
if type(state_data) != dict:
|
|
state_data = {}
|
|
left_to_commit = []
|
|
for flat in reversed(flats):
|
|
found_state = False
|
|
for branch in ["factory", "devel"]:
|
|
if flat.commit.dbid == state_data.get(branch):
|
|
branch_state[branch] = flat.commit
|
|
flat.commit.git_commit = self.git.branch_head(branch)
|
|
logging.debug(
|
|
f"Found {self.git.path}'s {branch} branch in state {flat}"
|
|
)
|
|
left_to_commit = []
|
|
found_state = True
|
|
if not found_state:
|
|
left_to_commit.append(flat)
|
|
return left_to_commit
|
|
|
|
def export_as_git(self):
|
|
if os.getenv("CHECK_ALL_LFS"):
|
|
LFSOid.check_all(self.db, self.package)
|
|
tree = TreeBuilder(self.db).build(self.project, self.package)
|
|
flats = tree.as_flat_list()
|
|
|
|
branch_state = {"factory": None, "devel": None}
|
|
left_to_commit = self.check_repo_state(flats, branch_state)
|
|
|
|
if not left_to_commit:
|
|
return
|
|
|
|
logging.info(f"Commiting into {self.git.path}")
|
|
self.run_gc()
|
|
users = dict()
|
|
|
|
for flat in left_to_commit:
|
|
if flat.commit.userid not in users:
|
|
users[flat.commit.userid] = User.find(self.db, flat.commit.userid)
|
|
flat.user = users[flat.commit.userid]
|
|
self.gc_cnt -= 1
|
|
if self.gc_cnt <= 0 and self.gc_interval:
|
|
self.run_gc()
|
|
logging.debug(f"Committing {flat}")
|
|
self.commit_flat(flat, branch_state)
|
|
|
|
self.git.push()
|
|
|
|
def run_gc(self):
|
|
self.gc_cnt = self.gc_interval
|
|
self.git.gc()
|
|
|
|
def is_lfs_file(self, package, filename, size):
|
|
if not is_binary_or_large(filename, size):
|
|
return False
|
|
return not self.proxy_sha256.is_text(package, filename)
|
|
|
|
def commit_file(self, flat, file, size, md5):
|
|
# have such files been detected as text mimetype before?
|
|
if self.is_lfs_file(flat.commit.package, file.name, size):
|
|
file_sha256 = self.proxy_sha256.get_or_put(
|
|
flat.commit.project,
|
|
flat.commit.package,
|
|
file.name,
|
|
flat.commit.expanded_srcmd5,
|
|
md5,
|
|
size,
|
|
)
|
|
# as it's newly registered, it might be a text file now, so double check
|
|
if not self.proxy_sha256.is_text(flat.commit.package, file.name):
|
|
self.git.add_lfs(file.name, file_sha256, size)
|
|
return
|
|
self.commit_non_lfs_file(flat, file, md5)
|
|
|
|
def commit_non_lfs_file(self, flat, file, md5):
|
|
self.obs.change_url(flat.commit.api_url)
|
|
self.obs.download(
|
|
flat.commit.project,
|
|
flat.commit.package,
|
|
file.name,
|
|
flat.commit.expanded_srcmd5,
|
|
self.git.path,
|
|
self.cachedir,
|
|
file_md5=md5,
|
|
)
|
|
self.git.add(file)
|
|
|
|
def branch_fits_parent1(self, flat, branch_state):
|
|
if branch_state[flat.branch] is None:
|
|
# everything fits nothing
|
|
return True
|
|
return flat.parent1 == branch_state[flat.branch]
|
|
|
|
def commit_flat(self, flat, branch_state):
|
|
parents = []
|
|
self.git.checkout(flat.branch)
|
|
|
|
if flat.parent1:
|
|
if not self.branch_fits_parent1(flat, branch_state):
|
|
logging.debug(f"Reset {flat.branch} onto {flat.parent1.short_string()}")
|
|
assert flat.parent1.git_commit
|
|
self.git.set_branch_head(flat.branch, flat.parent1.git_commit)
|
|
self.git.checkout(flat.branch)
|
|
parents.append(flat.parent1.git_commit)
|
|
if flat.parent2:
|
|
assert flat.parent2.git_commit
|
|
parents.append(flat.parent2.git_commit)
|
|
|
|
# create file if not existant
|
|
self.git.add_default_lfs_gitattributes(force=False)
|
|
|
|
to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch])
|
|
for file in to_delete:
|
|
self.git.remove(file)
|
|
for file, size, md5 in to_download:
|
|
self.commit_file(flat, file, size, md5)
|
|
|
|
commit = self.git.commit(
|
|
flat.user.realname,
|
|
flat.user.email,
|
|
flat.commit.commit_time,
|
|
flat.commit.git_commit_message(),
|
|
parents=parents,
|
|
)
|
|
flat.commit.git_commit = commit
|
|
branch_state[flat.branch] = flat.commit
|
|
with open(self.state_file, "w") as f:
|
|
data = {}
|
|
for branch in ["factory", "devel"]:
|
|
commit = branch_state[branch]
|
|
if commit:
|
|
data[branch] = commit.dbid
|
|
yaml.dump(data, f)
|