import logging import os import yaml from lib.binary import is_binary_or_large from lib.db import DB from lib.git import Git from lib.obs import OBS from lib.proxy_sha256 import ProxySHA256 from lib.tree_builder import TreeBuilder from lib.user import User class GitExporter: def __init__(self, api_url, project, package, repodir, cachedir): self.obs = OBS(api_url) self.project = project self.package = package self.db = DB() self.proxy_sha256 = ProxySHA256(self.obs, self.db) self.git = Git( repodir / package, committer="Git OBS Bridge", committer_email="obsbridge@suse.de", ) if self.git.exists(): self.git.open() else: self.git.create() self.git.add_gitea_remote(package) self.state_file = os.path.join(self.git.path, ".git", "_flat_state.yaml") self.gc_interval = 200 self.cachedir = cachedir def set_gc_interval(self, gc): self.gc_interval = gc def check_repo_state(self, flats, branch_state): state_data = dict() if os.path.exists(self.state_file): with open(self.state_file, "r") as f: state_data = yaml.safe_load(f) if type(state_data) != dict: state_data = {} left_to_commit = [] for flat in reversed(flats): found_state = False for branch in ["factory", "devel"]: if flat.commit.dbid == state_data.get(branch): branch_state[branch] = flat.commit flat.commit.git_commit = self.git.branch_head(branch) logging.debug( f"Found {self.git.path}'s {branch} branch in state {flat}" ) left_to_commit = [] found_state = True if not found_state: left_to_commit.append(flat) return left_to_commit def export_as_git(self): tree = TreeBuilder(self.db).build(self.project, self.package) flats = tree.as_flat_list() branch_state = {"factory": None, "devel": None} left_to_commit = self.check_repo_state(flats, branch_state) if not left_to_commit: return logging.info(f"Commiting into {self.git.path}") self.run_gc() users = dict() for flat in left_to_commit: if flat.commit.userid not in users: users[flat.commit.userid] = User.find(self.db, flat.commit.userid) flat.user = users[flat.commit.userid] self.gc_cnt -= 1 if self.gc_cnt <= 0 and self.gc_interval: self.run_gc() logging.debug(f"Committing {flat}") self.commit_flat(flat, branch_state) self.git.push() def run_gc(self): self.gc_cnt = self.gc_interval self.git.gc() def is_lfs_file(self, package, filename, size): if not is_binary_or_large(filename, size): return False return not self.proxy_sha256.is_text(package, filename) def commit_file(self, flat, file, size, md5): # have such files been detected as text mimetype before? if self.is_lfs_file(flat.commit.package, file.name, size): file_sha256 = self.proxy_sha256.get_or_put( flat.commit.project, flat.commit.package, file.name, flat.commit.expanded_srcmd5, md5, size, ) # as it's newly registered, it might be a text file now, so double check if not self.proxy_sha256.is_text(flat.commit.package, file.name): self.git.add_lfs(file.name, file_sha256, size) return self.commit_non_lfs_file(flat, file, md5) def commit_non_lfs_file(self, flat, file, md5): self.obs.change_url(flat.commit.api_url) self.obs.download( flat.commit.project, flat.commit.package, file.name, flat.commit.expanded_srcmd5, self.git.path, self.cachedir, file_md5=md5, ) self.git.add(file) def branch_fits_parent1(self, flat, branch_state): if branch_state[flat.branch] is None: # everything fits nothing return True return flat.parent1 == branch_state[flat.branch] def commit_flat(self, flat, branch_state): parents = [] self.git.checkout(flat.branch) if flat.parent1: if not self.branch_fits_parent1(flat, branch_state): logging.debug(f"Reset {flat.branch} onto {flat.parent1.short_string()}") assert flat.parent1.git_commit self.git.set_branch_head(flat.branch, flat.parent1.git_commit) self.git.checkout(flat.branch) parents.append(flat.parent1.git_commit) if flat.parent2: assert flat.parent2.git_commit parents.append(flat.parent2.git_commit) # create file if not existant self.git.add_default_lfs_gitattributes(force=False) to_download, to_delete = flat.commit.calc_delta(branch_state[flat.branch]) for file in to_delete: self.git.remove(file) for file, size, md5 in to_download: self.commit_file(flat, file, size, md5) commit = self.git.commit( flat.user.realname, flat.user.email, flat.commit.commit_time, flat.commit.git_commit_message(), parents=parents, ) flat.commit.git_commit = commit branch_state[flat.branch] = flat.commit with open(self.state_file, "w") as f: data = {} for branch in ["factory", "devel"]: commit = branch_state[branch] if commit: data[branch] = commit.dbid yaml.dump(data, f)